Spaces:

Edmond98
/

search

Sleeping

App Files Files Community

Edmond7 commited on Oct 21, 2024

Commit

ab3ef82

verified ·

1 Parent(s): 1749c38

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -197

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import os
-import random
-from fastapi import FastAPI, HTTPException, UploadFile, Depends, Query, Request
-from fastapi.responses import JSONResponse
 from fastapi.security import APIKeyHeader
 import aiohttp
 import asyncio
@@ -14,9 +12,6 @@ import boto3
 from botocore.exceptions import NoCredentialsError
 from duckduckgo_search import DDGS
 from bs4 import BeautifulSoup
-import requests
-from innertube import InnerTube
-import ffmpeg
 app = FastAPI()
@@ -45,12 +40,6 @@ S3_REGION = os.environ.get("S3_REGION")
 if not all([API_KEY, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_BUCKET, S3_REGION]):
     raise ValueError("Missing required environment variables")
-# User agents for requests
-USER_AGENTS = [
-    'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
-    'Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
-    'Mozilla/5.0 (iPod touch; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1'
-]
 async def search_and_get_videos(query: str, num_videos: int = 2) -> List[Dict]:
     for instance in INVIDIOUS_INSTANCES:
@@ -76,97 +65,52 @@ async def search_and_get_videos(query: str, num_videos: int = 2) -> List[Dict]:
     logger.error("All Invidious instances failed")
     return []
-def get_youtube_audio(video_id):
-    try:
-        logger.info(f"Starting YouTube audio extraction for video ID: {video_id}")
-        # Create an InnerTube client for iOS
-        client = InnerTube("IOS")
-        logger.info("InnerTube client created")
-        # Fetch video info
-        logger.info("Fetching video info")
-        video_info = client.player(video_id)
-        logger.info("Video info fetched successfully")
-        # Check if the video has streaming data
-        if 'streamingData' not in video_info:
-            logger.error(f"No 'streamingData' found in video info for video ID {video_id}")
-            return {'success': False, 'error': "No streaming data found for the video"}
-        # Extract the audio streams
-        streams = video_info["streamingData"]["adaptiveFormats"]
-        audio_streams = [s for s in streams if s['mimeType'].startswith('audio/')]
-        if not audio_streams:
-            logger.warning(f"No audio streams found for video ID {video_id}")
-            return {'success': False, 'error': "No audio streams found"}
-        # Choose the highest quality audio stream
-        audio_stream = max(audio_streams, key=lambda x: x.get('bitrate', 0))
-        audio_url = audio_stream['url']
-        logger.info(f"Selected audio stream URL: {audio_url[:100]}...")  # Log first 100 chars of URL
-        # Prepare headers
-        headers = {
-            'User-Agent': random.choice(USER_AGENTS),
-            'Accept': '*/*',
-            'Accept-Encoding': 'gzip, deflate, br',
-            'Range': 'bytes=0-',
-            'Connection': 'keep-alive',
-        }
-        # Download the audio
-        logger.info("Starting audio download")
-        response = requests.get(audio_url, headers=headers, stream=True)
-        logger.info(f"Download response status code: {response.status_code}")
-        if response.status_code in [200, 206]:  # 200 OK or 206 Partial Content
-            # Create a temporary file for the downloaded audio
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as temp_file:
-                temp_file_path = temp_file.name
-                logger.info(f"Created temporary file: {temp_file_path}")
-                # Write the audio data to the file
-                total_size = 0
-                for chunk in response.iter_content(chunk_size=8192):
-                    temp_file.write(chunk)
-                    total_size += len(chunk)
-                    if total_size % (1024 * 1024) == 0:  # Log every 1MB
-                        logger.info(f"Downloaded {total_size / (1024 * 1024):.2f} MB")
-            logger.info(f"Audio download completed. Total size: {total_size / (1024 * 1024):.2f} MB")
-            # Convert the downloaded audio to MP3
-            mp3_temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
-            mp3_temp_file_path = mp3_temp_file.name
-            mp3_temp_file.close()
-            logger.info(f"Converting audio to MP3: {mp3_temp_file_path}")
             try:
-                (
-                    ffmpeg
-                    .input(temp_file_path)
-                    .output(mp3_temp_file_path, acodec='libmp3lame', audio_bitrate='128k')
-                    .overwrite_output()
-                    .run(capture_stdout=True, capture_stderr=True)
-                )
-                logger.info("Audio conversion to MP3 completed successfully")
-            except ffmpeg.Error as e:
-                logger.error(f"Error during audio conversion: {e.stderr.decode()}")
-                return {'success': False, 'error': "Failed to convert audio to MP3"}
-            finally:
-                # Remove the original downloaded file
-                os.unlink(temp_file_path)
-            return {'success': True, 'temp_file_path': mp3_temp_file_path}
-        else:
-            logger.error(f"Failed to download audio: HTTP {response.status_code}")
-            return {'success': False, 'error': f"Download failed: HTTP {response.status_code}"}
-    except Exception as e:
-        logger.exception(f"Error fetching YouTube audio for video ID {video_id}: {str(e)}")
-        return {'success': False, 'error': f"Error: {str(e)}"}
 def extract_text_from_document(file: UploadFile) -> dict:
     try:
@@ -198,12 +142,7 @@ def upload_to_s3(local_file, s3_file):
     )
     try:
-        s3_client.upload_file(
-            local_file,
-            S3_BUCKET,
-            s3_file,
-            ExtraArgs={'ContentDisposition': 'attachment'}
-        )
         s3_url = f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com/{s3_file}"
         return s3_url
     except NoCredentialsError:
@@ -243,37 +182,6 @@ async def verify_api_key(api_key: str = Depends(api_key_header)):
         raise HTTPException(status_code=401, detail="Invalid API Key")
     return api_key
-def download_stream(url, output_path):
-    headers = {
-        'User-Agent': random.choice(USER_AGENTS),
-        'Accept': '*/*',
-        'Accept-Encoding': 'gzip, deflate, br',
-        'Range': 'bytes=0-',
-        'Connection': 'keep-alive',
-    }
-    with requests.get(url, headers=headers, stream=True) as r:
-        r.raise_for_status()
-        with open(output_path, 'wb') as f:
-            for chunk in r.iter_content(chunk_size=8192):
-                f.write(chunk)
-def get_best_video_stream(streams):
-    video_streams = [s for s in streams if s['mimeType'].startswith('video/')]
-    # Try to get 720p or the highest quality below 720p
-    preferred_stream = next((s for s in video_streams if s['qualityLabel'] == '720p'), None)
-    if not preferred_stream:
-        # If 720p is not available, get the highest quality below 720p
-        below_720p = [s for s in video_streams if int(s['qualityLabel'][:-1]) < 720]
-        preferred_stream = max(below_720p, key=lambda x: int(x['qualityLabel'][:-1])) if below_720p else None
-    return preferred_stream
-def get_best_audio_stream(streams):
-    audio_streams = [s for s in streams if s['mimeType'].startswith('audio/')]
-    return max(audio_streams, key=lambda x: x['bitrate']) if audio_streams else None
 @app.get("/search-videos/")
 async def search_videos(
     query: str,
@@ -287,11 +195,11 @@ async def search_videos(
 @app.get("/get-audio/{video_id}")
 async def get_audio(video_id: str, api_key: str = Depends(verify_api_key)):
-    result = get_youtube_audio(video_id)
     if not result['success']:
-        raise HTTPException(status_code=500, detail=result['error'])
-    s3_file_name = f"{video_id}.mp3"
     s3_url = upload_to_s3(result['temp_file_path'], s3_file_name)
     if s3_url:
@@ -300,62 +208,6 @@ async def get_audio(video_id: str, api_key: str = Depends(verify_api_key)):
     else:
         raise HTTPException(status_code=500, detail="Failed to upload audio to S3")
-@app.post("/get-video-streams")
-async def get_video_streams(video_id: str, api_key: str = Depends(verify_api_key)):
-    try:
-        # Create an InnerTube client
-        client = InnerTube("WEB")
-        # Fetch video info
-        video_info = client.player(video_id)
-        # Get the best video and audio streams
-        streams = video_info["streamingData"]["adaptiveFormats"]
-        video_stream = get_best_video_stream(streams)
-        audio_stream = get_best_audio_stream(streams)
-        if not video_stream or not audio_stream:
-            raise HTTPException(status_code=404, detail="Could not find suitable video or audio stream")
-        # Download video and audio
-        with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as video_file, \
-             tempfile.NamedTemporaryFile(delete=False, suffix='.m4a') as audio_file:
-            download_stream(video_stream['url'], video_file.name)
-            download_stream(audio_stream['url'], audio_file.name)
-            # Combine video and audio
-            output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
-            try:
-                (
-                    ffmpeg
-                    .input(video_file.name)
-                    .input(audio_file.name)
-                    .output(output_file.name, vcodec='libx264', acodec='aac')
-                    .overwrite_output()
-                    .run(capture_stdout=True, capture_stderr=True)
-                )
-            except ffmpeg.Error as e:
-                raise HTTPException(status_code=500, detail=f"Error combining video and audio: {e.stderr.decode()}")
-            # Upload combined video to S3
-            s3_file_name = f"{video_id}_combined.mp4"
-            s3_url = upload_to_s3(output_file.name, s3_file_name)
-            if s3_url:
-                return {"video_url": s3_url}
-            else:
-                raise HTTPException(status_code=500, detail="Failed to upload video to S3")
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error processing video: {str(e)}")
-    finally:
-        # Clean up temporary files
-        for file in [video_file.name, audio_file.name, output_file.name]:
-            if os.path.exists(file):
-                os.unlink(file)
 @app.post("/extract-text/")
 async def extract_text(file: UploadFile, api_key: str = Depends(verify_api_key)):
     result = extract_text_from_document(file)

 import os
+from fastapi import FastAPI, HTTPException, UploadFile, Depends, Query
 from fastapi.security import APIKeyHeader
 import aiohttp
 import asyncio
 from botocore.exceptions import NoCredentialsError
 from duckduckgo_search import DDGS
 from bs4 import BeautifulSoup
 app = FastAPI()
 if not all([API_KEY, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_BUCKET, S3_REGION]):
     raise ValueError("Missing required environment variables")
 async def search_and_get_videos(query: str, num_videos: int = 2) -> List[Dict]:
     for instance in INVIDIOUS_INSTANCES:
     logger.error("All Invidious instances failed")
     return []
+async def get_youtube_audio(video_id: str, max_retries: int = 3) -> Dict:
+    for instance in INVIDIOUS_INSTANCES:
+        for attempt in range(max_retries):
             try:
+                url = f"{instance}/api/v1/videos/{video_id}"
+                async with aiohttp.ClientSession() as session:
+                    async with session.get(url) as response:
+                        response.raise_for_status()
+                        video_data = await response.json()
+                        audio_format = next((format for format in video_data.get('adaptiveFormats', [])
+                                             if format.get('type', '').startswith('audio/mp4')), None)
+                        if audio_format:
+                            audio_url = audio_format.get('url')
+                            if audio_url:
+                                try:
+                                    async with session.get(audio_url) as audio_response:
+                                        audio_content = await audio_response.read()
+                                        with tempfile.NamedTemporaryFile(delete=False, suffix='.m4a') as temp_file:
+                                            temp_file.write(audio_content)
+                                            temp_file_path = temp_file.name
+                                        return {'success': True, 'temp_file_path': temp_file_path}
+                                except aiohttp.ServerDisconnectedError:
+                                    if attempt == max_retries - 1:
+                                        logger.error(f"Max retries reached for video ID {video_id} on {instance}")
+                                        break
+                                    await asyncio.sleep(1 * (attempt + 1))
+                                    continue
+                        logger.warning(f"No suitable audio format found for video ID {video_id} on {instance}")
+                        break
+            except aiohttp.ClientError as e:
+                logger.error(f"Network error fetching YouTube audio for video ID {video_id} on {instance}: {e}")
+            except json.JSONDecodeError:
+                logger.error(f"Error decoding JSON response for video ID {video_id} on {instance}")
+            except Exception as e:
+                logger.error(f"Unexpected error fetching YouTube audio for video ID {video_id} on {instance}: {e}")
+            if attempt == max_retries - 1:
+                break
+            await asyncio.sleep(1 * (attempt + 1))
+    return {'success': False, 'error': "Failed to fetch audio after multiple attempts on all instances"}
 def extract_text_from_document(file: UploadFile) -> dict:
     try:
     )
     try:
+        s3_client.upload_file(local_file, S3_BUCKET, s3_file)
         s3_url = f"https://{S3_BUCKET}.s3.{S3_REGION}.amazonaws.com/{s3_file}"
         return s3_url
     except NoCredentialsError:
         raise HTTPException(status_code=401, detail="Invalid API Key")
     return api_key
 @app.get("/search-videos/")
 async def search_videos(
     query: str,
 @app.get("/get-audio/{video_id}")
 async def get_audio(video_id: str, api_key: str = Depends(verify_api_key)):
+    result = await get_youtube_audio(video_id)
     if not result['success']:
+        raise HTTPException(status_code=404, detail=result['error'])
+    s3_file_name = f"{video_id}.m4a"
     s3_url = upload_to_s3(result['temp_file_path'], s3_file_name)
     if s3_url:
     else:
         raise HTTPException(status_code=500, detail="Failed to upload audio to S3")
 @app.post("/extract-text/")
 async def extract_text(file: UploadFile, api_key: str = Depends(verify_api_key)):
     result = extract_text_from_document(file)