Update app.py
Browse files
app.py
CHANGED
@@ -47,16 +47,147 @@ if not all([API_KEY, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_BUCKET, S3_REGIO
|
|
47 |
|
48 |
# User agents for requests
|
49 |
USER_AGENTS = [
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
]
|
54 |
|
55 |
-
def
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
def upload_to_s3(local_file, s3_file):
|
62 |
s3_client = boto3.client(
|
@@ -79,7 +210,220 @@ def upload_to_s3(local_file, s3_file):
|
|
79 |
logger.error("Credentials not available")
|
80 |
return None
|
81 |
|
82 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
try:
|
84 |
logger.info(f"Starting YouTube audio extraction for video ID: {video_id}")
|
85 |
|
@@ -133,52 +477,11 @@ def get_youtube_audio(video_id):
|
|
133 |
logger.info(f"Downloaded {total_size / (1024 * 1024):.2f} MB")
|
134 |
|
135 |
logger.info(f"Audio download completed. Total size: {total_size / (1024 * 1024):.2f} MB")
|
136 |
-
|
137 |
-
# Upload to S3
|
138 |
-
s3_file_name = f"{video_id}.mp3"
|
139 |
-
s3_url = upload_to_s3(temp_file_path, s3_file_name)
|
140 |
-
os.unlink(temp_file_path) # Clean up the temporary file
|
141 |
-
|
142 |
-
if s3_url:
|
143 |
-
return {'success': True, 'audio_url': s3_url}
|
144 |
-
else:
|
145 |
-
return {'success': False, 'error': "Failed to upload audio to S3"}
|
146 |
else:
|
147 |
logger.error(f"Failed to download audio: HTTP {response.status_code}")
|
148 |
return {'success': False, 'error': f"Download failed: HTTP {response.status_code}"}
|
149 |
|
150 |
except Exception as e:
|
151 |
logger.exception(f"Error fetching YouTube audio for video ID {video_id}: {str(e)}")
|
152 |
-
return {'success': False, 'error': f"Error: {str(e)}"}
|
153 |
-
|
154 |
-
async def search_and_get_videos(query: str, num_videos: int = 2) -> List[Dict]:
|
155 |
-
for instance in INVIDIOUS_INSTANCES:
|
156 |
-
url = f"{instance}/api/v1/search?q={query}&type=video"
|
157 |
-
try:
|
158 |
-
async with aiohttp.ClientSession() as session:
|
159 |
-
async with session.get(url) as response:
|
160 |
-
response.raise_for_status()
|
161 |
-
search_results = await response.json()
|
162 |
-
videos = [
|
163 |
-
{
|
164 |
-
"id": video.get("videoId"),
|
165 |
-
"title": video.get("title"),
|
166 |
-
"thumbnail": video["videoThumbnails"][0]["url"]
|
167 |
-
if video.get("videoThumbnails")
|
168 |
-
else "",
|
169 |
-
}
|
170 |
-
for video in search_results
|
171 |
-
][:num_videos]
|
172 |
-
return videos
|
173 |
-
except aiohttp.ClientError as e:
|
174 |
-
logger.error(f"Error performing video search on {instance}: {e}")
|
175 |
-
logger.error("All Invidious instances failed")
|
176 |
-
return []
|
177 |
-
|
178 |
-
@app.get("/get-audio/{video_id}")
|
179 |
-
async def get_audio(video_id: str, api_key: str = Depends(verify_api_key)):
|
180 |
-
result = get_youtube_audio(video_id)
|
181 |
-
if not result['success']:
|
182 |
-
raise HTTPException(status_code=500, detail=result['error'])
|
183 |
-
|
184 |
-
return {"audio_url": result['audio_url']}
|
|
|
47 |
|
48 |
# User agents for requests
|
49 |
USER_AGENTS = [
|
50 |
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
51 |
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
|
52 |
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
53 |
]
|
54 |
|
55 |
+
async def search_and_get_videos(query: str, num_videos: int = 2) -> List[Dict]:
|
56 |
+
for instance in INVIDIOUS_INSTANCES:
|
57 |
+
url = f"{instance}/api/v1/search?q={query}&type=video"
|
58 |
+
try:
|
59 |
+
async with aiohttp.ClientSession() as session:
|
60 |
+
async with session.get(url) as response:
|
61 |
+
response.raise_for_status()
|
62 |
+
search_results = await response.json()
|
63 |
+
videos = [
|
64 |
+
{
|
65 |
+
"id": video.get("videoId"),
|
66 |
+
"title": video.get("title"),
|
67 |
+
"thumbnail": video["videoThumbnails"][0]["url"]
|
68 |
+
if video.get("videoThumbnails")
|
69 |
+
else "",
|
70 |
+
}
|
71 |
+
for video in search_results
|
72 |
+
][:num_videos]
|
73 |
+
return videos
|
74 |
+
except aiohttp.ClientError as e:
|
75 |
+
logger.error(f"Error performing video search on {instance}: {e}")
|
76 |
+
logger.error("All Invidious instances failed")
|
77 |
+
return []
|
78 |
+
|
79 |
+
def get_youtube_audio(video_id):
|
80 |
+
try:
|
81 |
+
logger.info(f"Starting YouTube audio extraction for video ID: {video_id}")
|
82 |
+
|
83 |
+
# Create an InnerTube client for iOS
|
84 |
+
client = InnerTube("IOS")
|
85 |
+
logger.info("InnerTube client created")
|
86 |
+
|
87 |
+
# Fetch video info
|
88 |
+
logger.info("Fetching video info")
|
89 |
+
video_info = client.player(video_id)
|
90 |
+
logger.info("Video info fetched successfully")
|
91 |
+
|
92 |
+
# Check if the video has streaming data
|
93 |
+
if 'streamingData' not in video_info:
|
94 |
+
logger.error(f"No 'streamingData' found in video info for video ID {video_id}")
|
95 |
+
return {'success': False, 'error': "No streaming data found for the video"}
|
96 |
+
|
97 |
+
# Extract the audio streams
|
98 |
+
streams = video_info["streamingData"]["adaptiveFormats"]
|
99 |
+
audio_streams = [s for s in streams if s['mimeType'].startswith('audio/')]
|
100 |
+
|
101 |
+
if not audio_streams:
|
102 |
+
logger.warning(f"No audio streams found for video ID {video_id}")
|
103 |
+
return {'success': False, 'error': "No audio streams found"}
|
104 |
+
|
105 |
+
# Choose the highest quality audio stream
|
106 |
+
audio_stream = max(audio_streams, key=lambda x: x.get('bitrate', 0))
|
107 |
+
audio_url = audio_stream['url']
|
108 |
+
logger.info(f"Selected audio stream URL: {audio_url[:100]}...") # Log first 100 chars of URL
|
109 |
+
|
110 |
+
# Prepare headers
|
111 |
+
headers = {
|
112 |
+
'User-Agent': random.choice(USER_AGENTS),
|
113 |
+
'Accept': '*/*',
|
114 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
115 |
+
'Range': 'bytes=0-',
|
116 |
+
'Connection': 'keep-alive',
|
117 |
+
}
|
118 |
+
|
119 |
+
# Download the audio
|
120 |
+
logger.info("Starting audio download")
|
121 |
+
response = requests.get(audio_url, headers=headers, stream=True)
|
122 |
+
logger.info(f"Download response status code: {response.status_code}")
|
123 |
+
|
124 |
+
if response.status_code in [200, 206]: # 200 OK or 206 Partial Content
|
125 |
+
# Create a temporary file for the downloaded audio
|
126 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as temp_file:
|
127 |
+
temp_file_path = temp_file.name
|
128 |
+
logger.info(f"Created temporary file: {temp_file_path}")
|
129 |
+
# Write the audio data to the file
|
130 |
+
total_size = 0
|
131 |
+
for chunk in response.iter_content(chunk_size=8192):
|
132 |
+
temp_file.write(chunk)
|
133 |
+
total_size += len(chunk)
|
134 |
+
if total_size % (1024 * 1024) == 0: # Log every 1MB
|
135 |
+
logger.info(f"Downloaded {total_size / (1024 * 1024):.2f} MB")
|
136 |
+
|
137 |
+
logger.info(f"Audio download completed. Total size: {total_size / (1024 * 1024):.2f} MB")
|
138 |
+
|
139 |
+
# Convert the downloaded audio to MP3
|
140 |
+
mp3_temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
|
141 |
+
mp3_temp_file_path = mp3_temp_file.name
|
142 |
+
mp3_temp_file.close()
|
143 |
+
|
144 |
+
logger.info(f"Converting audio to MP3: {mp3_temp_file_path}")
|
145 |
+
|
146 |
+
try:
|
147 |
+
(
|
148 |
+
ffmpeg
|
149 |
+
.input(temp_file_path)
|
150 |
+
.output(mp3_temp_file_path, acodec='libmp3lame', audio_bitrate='128k')
|
151 |
+
.overwrite_output()
|
152 |
+
.run(capture_stdout=True, capture_stderr=True)
|
153 |
+
)
|
154 |
+
logger.info("Audio conversion to MP3 completed successfully")
|
155 |
+
except ffmpeg.Error as e:
|
156 |
+
logger.error(f"Error during audio conversion: {e.stderr.decode()}")
|
157 |
+
return {'success': False, 'error': "Failed to convert audio to MP3"}
|
158 |
+
finally:
|
159 |
+
# Remove the original downloaded file
|
160 |
+
os.unlink(temp_file_path)
|
161 |
+
|
162 |
+
return {'success': True, 'temp_file_path': mp3_temp_file_path}
|
163 |
+
else:
|
164 |
+
logger.error(f"Failed to download audio: HTTP {response.status_code}")
|
165 |
+
return {'success': False, 'error': f"Download failed: HTTP {response.status_code}"}
|
166 |
+
|
167 |
+
except Exception as e:
|
168 |
+
logger.exception(f"Error fetching YouTube audio for video ID {video_id}: {str(e)}")
|
169 |
+
return {'success': False, 'error': f"Error: {str(e)}"}
|
170 |
+
|
171 |
+
def extract_text_from_document(file: UploadFile) -> dict:
|
172 |
+
try:
|
173 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
|
174 |
+
content = file.file.read()
|
175 |
+
temp_file.write(content)
|
176 |
+
temp_file_path = temp_file.name
|
177 |
+
|
178 |
+
text = textract.process(temp_file_path).decode('utf-8')
|
179 |
+
|
180 |
+
os.unlink(temp_file_path)
|
181 |
+
|
182 |
+
return {
|
183 |
+
'success': True,
|
184 |
+
'extracted_text': text
|
185 |
+
}
|
186 |
+
except Exception as e:
|
187 |
+
return {
|
188 |
+
'success': False,
|
189 |
+
'error': f"Error extracting text from document: {str(e)}"
|
190 |
+
}
|
191 |
|
192 |
def upload_to_s3(local_file, s3_file):
|
193 |
s3_client = boto3.client(
|
|
|
210 |
logger.error("Credentials not available")
|
211 |
return None
|
212 |
|
213 |
+
def image_search(query: str, num_results: int = 5) -> dict:
|
214 |
+
try:
|
215 |
+
with DDGS(
|
216 |
+
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
|
217 |
+
) as ddgs:
|
218 |
+
results = list(ddgs.images(query, max_results=num_results))
|
219 |
+
formatted_results = [
|
220 |
+
{
|
221 |
+
'title': result['title'],
|
222 |
+
'image_url': result['image'],
|
223 |
+
'thumbnail_url': result['thumbnail'],
|
224 |
+
'source_url': result['url'],
|
225 |
+
'width': result['width'],
|
226 |
+
'height': result['height']
|
227 |
+
}
|
228 |
+
for result in results
|
229 |
+
]
|
230 |
+
return {
|
231 |
+
'success': True,
|
232 |
+
'results': formatted_results
|
233 |
+
}
|
234 |
+
except Exception as e:
|
235 |
+
logger.error(f"Error performing image search: {e}")
|
236 |
+
return {
|
237 |
+
'success': False,
|
238 |
+
'error': f"Error performing image search: {str(e)}"
|
239 |
+
}
|
240 |
+
|
241 |
+
async def verify_api_key(api_key: str = Depends(api_key_header)):
|
242 |
+
if api_key != API_KEY:
|
243 |
+
raise HTTPException(status_code=401, detail="Invalid API Key")
|
244 |
+
return api_key
|
245 |
+
|
246 |
+
def download_stream(url, output_path):
|
247 |
+
headers = {
|
248 |
+
'User-Agent': random.choice(USER_AGENTS),
|
249 |
+
'Accept': '*/*',
|
250 |
+
'Accept-Encoding': 'gzip, deflate, br',
|
251 |
+
'Range': 'bytes=0-',
|
252 |
+
'Connection': 'keep-alive',
|
253 |
+
}
|
254 |
+
|
255 |
+
with requests.get(url, headers=headers, stream=True) as r:
|
256 |
+
r.raise_for_status()
|
257 |
+
with open(output_path, 'wb') as f:
|
258 |
+
for chunk in r.iter_content(chunk_size=8192):
|
259 |
+
f.write(chunk)
|
260 |
+
|
261 |
+
def get_best_video_stream(streams):
|
262 |
+
video_streams = [s for s in streams if s['mimeType'].startswith('video/')]
|
263 |
+
|
264 |
+
# Try to get 720p or the highest quality below 720p
|
265 |
+
preferred_stream = next((s for s in video_streams if s['qualityLabel'] == '720p'), None)
|
266 |
+
if not preferred_stream:
|
267 |
+
# If 720p is not available, get the highest quality below 720p
|
268 |
+
below_720p = [s for s in video_streams if int(s['qualityLabel'][:-1]) < 720]
|
269 |
+
preferred_stream = max(below_720p, key=lambda x: int(x['qualityLabel'][:-1])) if below_720p else None
|
270 |
+
|
271 |
+
return preferred_stream
|
272 |
+
|
273 |
+
def get_best_audio_stream(streams):
|
274 |
+
audio_streams = [s for s in streams if s['mimeType'].startswith('audio/')]
|
275 |
+
return max(audio_streams, key=lambda x: x['bitrate']) if audio_streams else None
|
276 |
+
|
277 |
+
@app.get("/search-videos/")
|
278 |
+
async def search_videos(
|
279 |
+
query: str,
|
280 |
+
num_videos: int = Query(default=2, ge=1, le=10),
|
281 |
+
api_key: str = Depends(verify_api_key)
|
282 |
+
):
|
283 |
+
videos = await search_and_get_videos(query, num_videos)
|
284 |
+
if not videos:
|
285 |
+
raise HTTPException(status_code=404, detail="No videos found or an error occurred during the search.")
|
286 |
+
return {"videos": videos}
|
287 |
+
|
288 |
+
@app.get("/get-audio/{video_id}")
|
289 |
+
async def get_audio(video_id: str, api_key: str = Depends(verify_api_key)):
|
290 |
+
result = get_youtube_audio(video_id)
|
291 |
+
if not result['success']:
|
292 |
+
raise HTTPException(status_code=500, detail=result['error'])
|
293 |
+
|
294 |
+
s3_file_name = f"{video_id}.mp3"
|
295 |
+
s3_url = upload_to_s3(result['temp_file_path'], s3_file_name)
|
296 |
+
|
297 |
+
if s3_url:
|
298 |
+
os.unlink(result['temp_file_path']) # Remove the temporary file
|
299 |
+
return {"audio_url": s3_url}
|
300 |
+
else:
|
301 |
+
raise HTTPException(status_code=500, detail="Failed to upload audio to S3")
|
302 |
+
|
303 |
+
@app.post("/get-video-streams")
|
304 |
+
async def get_video_streams(video_id: str, api_key: str = Depends(verify_api_key)):
|
305 |
+
try:
|
306 |
+
# Create an InnerTube client
|
307 |
+
client = InnerTube("WEB")
|
308 |
+
|
309 |
+
# Fetch video info
|
310 |
+
video_info = client.player(video_id)
|
311 |
+
|
312 |
+
# Get the best video and audio streams
|
313 |
+
streams = video_info["streamingData"]["adaptiveFormats"]
|
314 |
+
video_stream = get_best_video_stream(streams)
|
315 |
+
audio_stream = get_best_audio_stream(streams)
|
316 |
+
|
317 |
+
if not video_stream or not audio_stream:
|
318 |
+
raise HTTPException(status_code=404, detail="Could not find suitable video or audio stream")
|
319 |
+
|
320 |
+
# Download video and audio
|
321 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as video_file, \
|
322 |
+
tempfile.NamedTemporaryFile(delete=False, suffix='.m4a') as audio_file:
|
323 |
+
|
324 |
+
download_stream(video_stream['url'], video_file.name)
|
325 |
+
download_stream(audio_stream['url'], audio_file.name)
|
326 |
+
|
327 |
+
# Combine video and audio
|
328 |
+
output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
329 |
+
|
330 |
+
try:
|
331 |
+
(
|
332 |
+
ffmpeg
|
333 |
+
.input(video_file.name)
|
334 |
+
.input(audio_file.name)
|
335 |
+
.output(output_file.name, vcodec='libx264', acodec='aac')
|
336 |
+
.overwrite_output()
|
337 |
+
.run(capture_stdout=True, capture_stderr=True)
|
338 |
+
)
|
339 |
+
except ffmpeg.Error as e:
|
340 |
+
raise HTTPException(status_code=500, detail=f"Error combining video and audio: {e.stderr.decode()}")
|
341 |
+
|
342 |
+
# Upload combined video to S3
|
343 |
+
s3_file_name = f"{video_id}_combined.mp4"
|
344 |
+
s3_url = upload_to_s3(output_file.name, s3_file_name)
|
345 |
+
|
346 |
+
if s3_url:
|
347 |
+
return {"video_url": s3_url}
|
348 |
+
else:
|
349 |
+
raise HTTPException(status_code=500, detail="Failed to upload video to S3")
|
350 |
+
|
351 |
+
except Exception as e:
|
352 |
+
raise HTTPException(status_code=500, detail=f"Error processing video: {str(e)}")
|
353 |
+
finally:
|
354 |
+
# Clean up temporary files
|
355 |
+
for file in [video_file.name, audio_file.name, output_file.name]:
|
356 |
+
if os.path.exists(file):
|
357 |
+
os.unlink(file)
|
358 |
+
|
359 |
+
@app.post("/extract-text/")
|
360 |
+
async def extract_text(file: UploadFile, api_key: str = Depends(verify_api_key)):
|
361 |
+
result = extract_text_from_document(file)
|
362 |
+
if not result['success']:
|
363 |
+
raise HTTPException(status_code=500, detail=result['error'])
|
364 |
+
return {"extracted_text": result['extracted_text']}
|
365 |
+
|
366 |
+
@app.get("/image-search/")
|
367 |
+
async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
|
368 |
+
result = image_search(query, num_results)
|
369 |
+
if not result['success']:
|
370 |
+
raise HTTPException(status_code=500, detail=result['error'])
|
371 |
+
return result
|
372 |
+
|
373 |
+
class DuckDuckGoSearch:
|
374 |
+
async def search(self, query: str, num_results: int = 5) -> list:
|
375 |
+
url = f"https://html.duckduckgo.com/html/?q={query}"
|
376 |
+
headers = {
|
377 |
+
"User-Agent": "Mozilla/5.0",
|
378 |
+
"Referer": "https://google.com/",
|
379 |
+
"Cookie": "kl=wt-wt",
|
380 |
+
}
|
381 |
+
|
382 |
+
async with aiohttp.ClientSession() as session:
|
383 |
+
async with session.get(url, headers=headers) as response:
|
384 |
+
if response.status != 200:
|
385 |
+
raise Exception("Failed to fetch data from DuckDuckGo")
|
386 |
+
|
387 |
+
html = await response.text()
|
388 |
+
soup = BeautifulSoup(html, "html.parser")
|
389 |
+
results = []
|
390 |
+
|
391 |
+
for result in soup.select(".result"):
|
392 |
+
title = result.select_one(".result__title .result__a")
|
393 |
+
url = result.select_one(".result__url")
|
394 |
+
desc = result.select_one(".result__snippet")
|
395 |
+
|
396 |
+
if title and url and desc:
|
397 |
+
results.append({
|
398 |
+
"title": title.get_text(strip=True),
|
399 |
+
"body": desc.get_text(strip=True),
|
400 |
+
"href": f"https://{url.get_text(strip=True)}",
|
401 |
+
})
|
402 |
+
|
403 |
+
if len(results) >= num_results:
|
404 |
+
break
|
405 |
+
|
406 |
+
return results
|
407 |
+
|
408 |
+
async def web_search(query: str, num_results: int = 5) -> dict:
|
409 |
+
try:
|
410 |
+
results = await DuckDuckGoSearch().search(query, num_results)
|
411 |
+
return {
|
412 |
+
'success': True,
|
413 |
+
'results': results
|
414 |
+
}
|
415 |
+
except Exception as e:
|
416 |
+
return {
|
417 |
+
'success': False,
|
418 |
+
'error': str(e)
|
419 |
+
}
|
420 |
+
|
421 |
+
@app.get("/web-search/")
|
422 |
+
async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
|
423 |
+
result = await web_search(query, num_results)
|
424 |
+
if not result['success']:
|
425 |
+
raise HTTPException(status_code=500, detail=result['error'])
|
426 |
+
return result, remplace get youtube audio by this def get_youtube_audio(video_id):
|
427 |
try:
|
428 |
logger.info(f"Starting YouTube audio extraction for video ID: {video_id}")
|
429 |
|
|
|
477 |
logger.info(f"Downloaded {total_size / (1024 * 1024):.2f} MB")
|
478 |
|
479 |
logger.info(f"Audio download completed. Total size: {total_size / (1024 * 1024):.2f} MB")
|
480 |
+
return {'success': True, 'temp_file_path': temp_file_path}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
else:
|
482 |
logger.error(f"Failed to download audio: HTTP {response.status_code}")
|
483 |
return {'success': False, 'error': f"Download failed: HTTP {response.status_code}"}
|
484 |
|
485 |
except Exception as e:
|
486 |
logger.exception(f"Error fetching YouTube audio for video ID {video_id}: {str(e)}")
|
487 |
+
return {'success': False, 'error': f"Error: {str(e)}"} , add ulpoad to s3 funtion and send the s3 file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|