File size: 6,883 Bytes
aa732b3
19cbc25
 
 
 
 
 
 
 
 
 
 
 
aa732b3
 
259d504
19cbc25
 
 
259d504
19cbc25
 
 
 
 
 
aa732b3
 
19cbc25
 
53afe5a
19cbc25
 
 
 
 
 
 
259d504
19cbc25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259d504
19cbc25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259d504
19cbc25
aa732b3
19cbc25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259d504
19cbc25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import os
from fastapi import FastAPI, HTTPException, UploadFile
from fastapi.security import APIKeyHeader
from fastapi.responses import JSONResponse
import requests
import aiohttp
import asyncio
import json
import tempfile
from typing import List, Dict
import logging
import random
import textract

app = FastAPI()

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# API key security
API_KEY_NAME = "X-API-Key"
api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False)

# Constants
INVIDIOUS_INSTANCE = "https://iv.melmac.space"
API_KEY = os.environ.get("API_KEY")

if not API_KEY:
    raise ValueError("API_KEY environment variable is not set")

def get_random_user_agent() -> str:
    user_agents = [
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
        "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36",
    ]
    return random.choice(user_agents)

async def search_and_get_videos(query: str) -> List[Dict]:
    url = f"{INVIDIOUS_INSTANCE}/api/v1/search?q={query}&type=video"
    try:
        async with aiohttp.ClientSession() as session:
            async with session.get(url, headers={"User-Agent": get_random_user_agent()}) as response:
                response.raise_for_status()
                search_results = await response.json()
                videos = [
                    {
                        "id": video.get("videoId"),
                        "title": video.get("title"),
                        "thumbnail": video["videoThumbnails"][0]["url"]
                        if video.get("videoThumbnails")
                        else "",
                    }
                    for video in search_results
                ][:2]
                return videos
    except aiohttp.ClientError as e:
        logger.error(f"Error performing video search: {e}")
        return []

async def get_youtube_audio(video_id: str, max_retries: int = 3) -> Dict:
    for attempt in range(max_retries):
        try:
            url = f"{INVIDIOUS_INSTANCE}/api/v1/videos/{video_id}"
            
            async with aiohttp.ClientSession() as session:
                async with session.get(url) as response:
                    response.raise_for_status()
                    video_data = await response.json()
                    
                    audio_format = next((format for format in video_data.get('adaptiveFormats', []) 
                                         if format.get('type', '').startswith('audio/mp4')), None)
                    
                    if audio_format:
                        audio_url = audio_format.get('url')
                        if audio_url:
                            try:
                                async with session.get(audio_url) as audio_response:
                                    audio_content = await audio_response.read()
                                    
                                    with tempfile.NamedTemporaryFile(delete=False, suffix='.m4a') as temp_file:
                                        temp_file.write(audio_content)
                                        temp_file_path = temp_file.name
                                    
                                    return {'success': True, 'temp_file_path': temp_file_path}
                            except aiohttp.ServerDisconnectedError:
                                if attempt == max_retries - 1:
                                    logger.error(f"Max retries reached for video ID {video_id}")
                                    return {'success': False, 'error': "Max retries reached"}
                                await asyncio.sleep(1 * (attempt + 1))
                                continue
                    
                    logger.warning(f"No suitable audio format found for video ID {video_id}")
                    return {'success': False, 'error': "No suitable audio format found"}
        except aiohttp.ClientError as e:
            logger.error(f"Network error fetching YouTube audio for video ID {video_id}: {e}")
        except json.JSONDecodeError:
            logger.error(f"Error decoding JSON response for video ID {video_id}")
        except Exception as e:
            logger.error(f"Unexpected error fetching YouTube audio for video ID {video_id}: {e}")
            if attempt == max_retries - 1:
                return {'success': False, 'error': str(e)}
            await asyncio.sleep(1 * (attempt + 1))
    
    return {'success': False, 'error': "Failed to fetch audio after multiple attempts"}

def extract_text_from_document(file: UploadFile) -> dict:
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
            content = file.file.read()
            temp_file.write(content)
            temp_file_path = temp_file.name

        text = textract.process(temp_file_path).decode('utf-8')

        os.unlink(temp_file_path)

        return {
            'success': True,
            'extracted_text': text
        }
    except Exception as e:
        return {
            'success': False,
            'error': f"Error extracting text from document: {str(e)}"
        }

@app.get("/search-videos/")
async def search_videos(query: str, api_key: str = APIKeyHeader(name=API_KEY_NAME)):
    if api_key != API_KEY:
        raise HTTPException(status_code=401, detail="Invalid API Key")
    
    videos = await search_and_get_videos(query)
    if not videos:
        return JSONResponse(
            status_code=404,
            content={"message": "No videos found or an error occurred during the search."}
        )
    return {"videos": videos}

@app.get("/get-audio/{video_id}")
async def get_audio(video_id: str, api_key: str = APIKeyHeader(name=API_KEY_NAME)):
    if api_key != API_KEY:
        raise HTTPException(status_code=401, detail="Invalid API Key")
    
    result = await get_youtube_audio(video_id)
    if not result['success']:
        return JSONResponse(
            status_code=404,
            content={"message": result['error']}
        )
    return {"audio_file_path": result['temp_file_path']}

@app.post("/extract-text/")
async def extract_text(file: UploadFile, api_key: str = APIKeyHeader(name=API_KEY_NAME)):
    if api_key != API_KEY:
        raise HTTPException(status_code=401, detail="Invalid API Key")
    
    result = extract_text_from_document(file)
    if not result['success']:
        return JSONResponse(
            status_code=500,
            content={"message": result['error']}
        )
    return {"extracted_text": result['extracted_text']}