File size: 6,883 Bytes
aa732b3 19cbc25 aa732b3 259d504 19cbc25 259d504 19cbc25 aa732b3 19cbc25 53afe5a 19cbc25 259d504 19cbc25 259d504 19cbc25 259d504 19cbc25 aa732b3 19cbc25 259d504 19cbc25 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 |
import os
from fastapi import FastAPI, HTTPException, UploadFile
from fastapi.security import APIKeyHeader
from fastapi.responses import JSONResponse
import requests
import aiohttp
import asyncio
import json
import tempfile
from typing import List, Dict
import logging
import random
import textract
app = FastAPI()
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# API key security
API_KEY_NAME = "X-API-Key"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)
# Constants
INVIDIOUS_INSTANCE = "https://iv.melmac.space"
API_KEY = os.environ.get("API_KEY")
if not API_KEY:
raise ValueError("API_KEY environment variable is not set")
def get_random_user_agent() -> str:
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36",
]
return random.choice(user_agents)
async def search_and_get_videos(query: str) -> List[Dict]:
url = f"{INVIDIOUS_INSTANCE}/api/v1/search?q={query}&type=video"
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers={"User-Agent": get_random_user_agent()}) as response:
response.raise_for_status()
search_results = await response.json()
videos = [
{
"id": video.get("videoId"),
"title": video.get("title"),
"thumbnail": video["videoThumbnails"][0]["url"]
if video.get("videoThumbnails")
else "",
}
for video in search_results
][:2]
return videos
except aiohttp.ClientError as e:
logger.error(f"Error performing video search: {e}")
return []
async def get_youtube_audio(video_id: str, max_retries: int = 3) -> Dict:
for attempt in range(max_retries):
try:
url = f"{INVIDIOUS_INSTANCE}/api/v1/videos/{video_id}"
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
response.raise_for_status()
video_data = await response.json()
audio_format = next((format for format in video_data.get('adaptiveFormats', [])
if format.get('type', '').startswith('audio/mp4')), None)
if audio_format:
audio_url = audio_format.get('url')
if audio_url:
try:
async with session.get(audio_url) as audio_response:
audio_content = await audio_response.read()
with tempfile.NamedTemporaryFile(delete=False, suffix='.m4a') as temp_file:
temp_file.write(audio_content)
temp_file_path = temp_file.name
return {'success': True, 'temp_file_path': temp_file_path}
except aiohttp.ServerDisconnectedError:
if attempt == max_retries - 1:
logger.error(f"Max retries reached for video ID {video_id}")
return {'success': False, 'error': "Max retries reached"}
await asyncio.sleep(1 * (attempt + 1))
continue
logger.warning(f"No suitable audio format found for video ID {video_id}")
return {'success': False, 'error': "No suitable audio format found"}
except aiohttp.ClientError as e:
logger.error(f"Network error fetching YouTube audio for video ID {video_id}: {e}")
except json.JSONDecodeError:
logger.error(f"Error decoding JSON response for video ID {video_id}")
except Exception as e:
logger.error(f"Unexpected error fetching YouTube audio for video ID {video_id}: {e}")
if attempt == max_retries - 1:
return {'success': False, 'error': str(e)}
await asyncio.sleep(1 * (attempt + 1))
return {'success': False, 'error': "Failed to fetch audio after multiple attempts"}
def extract_text_from_document(file: UploadFile) -> dict:
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
content = file.file.read()
temp_file.write(content)
temp_file_path = temp_file.name
text = textract.process(temp_file_path).decode('utf-8')
os.unlink(temp_file_path)
return {
'success': True,
'extracted_text': text
}
except Exception as e:
return {
'success': False,
'error': f"Error extracting text from document: {str(e)}"
}
@app.get("/search-videos/")
async def search_videos(query: str, api_key: str = APIKeyHeader(name=API_KEY_NAME)):
if api_key != API_KEY:
raise HTTPException(status_code=401, detail="Invalid API Key")
videos = await search_and_get_videos(query)
if not videos:
return JSONResponse(
status_code=404,
content={"message": "No videos found or an error occurred during the search."}
)
return {"videos": videos}
@app.get("/get-audio/{video_id}")
async def get_audio(video_id: str, api_key: str = APIKeyHeader(name=API_KEY_NAME)):
if api_key != API_KEY:
raise HTTPException(status_code=401, detail="Invalid API Key")
result = await get_youtube_audio(video_id)
if not result['success']:
return JSONResponse(
status_code=404,
content={"message": result['error']}
)
return {"audio_file_path": result['temp_file_path']}
@app.post("/extract-text/")
async def extract_text(file: UploadFile, api_key: str = APIKeyHeader(name=API_KEY_NAME)):
if api_key != API_KEY:
raise HTTPException(status_code=401, detail="Invalid API Key")
result = extract_text_from_document(file)
if not result['success']:
return JSONResponse(
status_code=500,
content={"message": result['error']}
)
return {"extracted_text": result['extracted_text']}
|