Edmond7 commited on
Commit
0e5ff0f
·
verified ·
1 Parent(s): 093c9c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +355 -52
app.py CHANGED
@@ -47,16 +47,147 @@ if not all([API_KEY, S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_BUCKET, S3_REGIO
47
 
48
  # User agents for requests
49
  USER_AGENTS = [
50
- 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
51
- 'Mozilla/5.0 (iPad; CPU OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1',
52
- 'Mozilla/5.0 (iPod touch; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1'
53
  ]
54
 
55
- def verify_api_key(api_key: str = Depends(api_key_header)):
56
- if api_key == API_KEY:
57
- return api_key
58
- else:
59
- raise HTTPException(status_code=403, detail="Could not validate API key")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def upload_to_s3(local_file, s3_file):
62
  s3_client = boto3.client(
@@ -79,7 +210,220 @@ def upload_to_s3(local_file, s3_file):
79
  logger.error("Credentials not available")
80
  return None
81
 
82
- def get_youtube_audio(video_id):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  try:
84
  logger.info(f"Starting YouTube audio extraction for video ID: {video_id}")
85
 
@@ -133,52 +477,11 @@ def get_youtube_audio(video_id):
133
  logger.info(f"Downloaded {total_size / (1024 * 1024):.2f} MB")
134
 
135
  logger.info(f"Audio download completed. Total size: {total_size / (1024 * 1024):.2f} MB")
136
-
137
- # Upload to S3
138
- s3_file_name = f"{video_id}.mp3"
139
- s3_url = upload_to_s3(temp_file_path, s3_file_name)
140
- os.unlink(temp_file_path) # Clean up the temporary file
141
-
142
- if s3_url:
143
- return {'success': True, 'audio_url': s3_url}
144
- else:
145
- return {'success': False, 'error': "Failed to upload audio to S3"}
146
  else:
147
  logger.error(f"Failed to download audio: HTTP {response.status_code}")
148
  return {'success': False, 'error': f"Download failed: HTTP {response.status_code}"}
149
 
150
  except Exception as e:
151
  logger.exception(f"Error fetching YouTube audio for video ID {video_id}: {str(e)}")
152
- return {'success': False, 'error': f"Error: {str(e)}"}
153
-
154
- async def search_and_get_videos(query: str, num_videos: int = 2) -> List[Dict]:
155
- for instance in INVIDIOUS_INSTANCES:
156
- url = f"{instance}/api/v1/search?q={query}&type=video"
157
- try:
158
- async with aiohttp.ClientSession() as session:
159
- async with session.get(url) as response:
160
- response.raise_for_status()
161
- search_results = await response.json()
162
- videos = [
163
- {
164
- "id": video.get("videoId"),
165
- "title": video.get("title"),
166
- "thumbnail": video["videoThumbnails"][0]["url"]
167
- if video.get("videoThumbnails")
168
- else "",
169
- }
170
- for video in search_results
171
- ][:num_videos]
172
- return videos
173
- except aiohttp.ClientError as e:
174
- logger.error(f"Error performing video search on {instance}: {e}")
175
- logger.error("All Invidious instances failed")
176
- return []
177
-
178
- @app.get("/get-audio/{video_id}")
179
- async def get_audio(video_id: str, api_key: str = Depends(verify_api_key)):
180
- result = get_youtube_audio(video_id)
181
- if not result['success']:
182
- raise HTTPException(status_code=500, detail=result['error'])
183
-
184
- return {"audio_url": result['audio_url']}
 
47
 
48
  # User agents for requests
49
  USER_AGENTS = [
50
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
51
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
52
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
53
  ]
54
 
55
+ async def search_and_get_videos(query: str, num_videos: int = 2) -> List[Dict]:
56
+ for instance in INVIDIOUS_INSTANCES:
57
+ url = f"{instance}/api/v1/search?q={query}&type=video"
58
+ try:
59
+ async with aiohttp.ClientSession() as session:
60
+ async with session.get(url) as response:
61
+ response.raise_for_status()
62
+ search_results = await response.json()
63
+ videos = [
64
+ {
65
+ "id": video.get("videoId"),
66
+ "title": video.get("title"),
67
+ "thumbnail": video["videoThumbnails"][0]["url"]
68
+ if video.get("videoThumbnails")
69
+ else "",
70
+ }
71
+ for video in search_results
72
+ ][:num_videos]
73
+ return videos
74
+ except aiohttp.ClientError as e:
75
+ logger.error(f"Error performing video search on {instance}: {e}")
76
+ logger.error("All Invidious instances failed")
77
+ return []
78
+
79
+ def get_youtube_audio(video_id):
80
+ try:
81
+ logger.info(f"Starting YouTube audio extraction for video ID: {video_id}")
82
+
83
+ # Create an InnerTube client for iOS
84
+ client = InnerTube("IOS")
85
+ logger.info("InnerTube client created")
86
+
87
+ # Fetch video info
88
+ logger.info("Fetching video info")
89
+ video_info = client.player(video_id)
90
+ logger.info("Video info fetched successfully")
91
+
92
+ # Check if the video has streaming data
93
+ if 'streamingData' not in video_info:
94
+ logger.error(f"No 'streamingData' found in video info for video ID {video_id}")
95
+ return {'success': False, 'error': "No streaming data found for the video"}
96
+
97
+ # Extract the audio streams
98
+ streams = video_info["streamingData"]["adaptiveFormats"]
99
+ audio_streams = [s for s in streams if s['mimeType'].startswith('audio/')]
100
+
101
+ if not audio_streams:
102
+ logger.warning(f"No audio streams found for video ID {video_id}")
103
+ return {'success': False, 'error': "No audio streams found"}
104
+
105
+ # Choose the highest quality audio stream
106
+ audio_stream = max(audio_streams, key=lambda x: x.get('bitrate', 0))
107
+ audio_url = audio_stream['url']
108
+ logger.info(f"Selected audio stream URL: {audio_url[:100]}...") # Log first 100 chars of URL
109
+
110
+ # Prepare headers
111
+ headers = {
112
+ 'User-Agent': random.choice(USER_AGENTS),
113
+ 'Accept': '*/*',
114
+ 'Accept-Encoding': 'gzip, deflate, br',
115
+ 'Range': 'bytes=0-',
116
+ 'Connection': 'keep-alive',
117
+ }
118
+
119
+ # Download the audio
120
+ logger.info("Starting audio download")
121
+ response = requests.get(audio_url, headers=headers, stream=True)
122
+ logger.info(f"Download response status code: {response.status_code}")
123
+
124
+ if response.status_code in [200, 206]: # 200 OK or 206 Partial Content
125
+ # Create a temporary file for the downloaded audio
126
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.webm') as temp_file:
127
+ temp_file_path = temp_file.name
128
+ logger.info(f"Created temporary file: {temp_file_path}")
129
+ # Write the audio data to the file
130
+ total_size = 0
131
+ for chunk in response.iter_content(chunk_size=8192):
132
+ temp_file.write(chunk)
133
+ total_size += len(chunk)
134
+ if total_size % (1024 * 1024) == 0: # Log every 1MB
135
+ logger.info(f"Downloaded {total_size / (1024 * 1024):.2f} MB")
136
+
137
+ logger.info(f"Audio download completed. Total size: {total_size / (1024 * 1024):.2f} MB")
138
+
139
+ # Convert the downloaded audio to MP3
140
+ mp3_temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
141
+ mp3_temp_file_path = mp3_temp_file.name
142
+ mp3_temp_file.close()
143
+
144
+ logger.info(f"Converting audio to MP3: {mp3_temp_file_path}")
145
+
146
+ try:
147
+ (
148
+ ffmpeg
149
+ .input(temp_file_path)
150
+ .output(mp3_temp_file_path, acodec='libmp3lame', audio_bitrate='128k')
151
+ .overwrite_output()
152
+ .run(capture_stdout=True, capture_stderr=True)
153
+ )
154
+ logger.info("Audio conversion to MP3 completed successfully")
155
+ except ffmpeg.Error as e:
156
+ logger.error(f"Error during audio conversion: {e.stderr.decode()}")
157
+ return {'success': False, 'error': "Failed to convert audio to MP3"}
158
+ finally:
159
+ # Remove the original downloaded file
160
+ os.unlink(temp_file_path)
161
+
162
+ return {'success': True, 'temp_file_path': mp3_temp_file_path}
163
+ else:
164
+ logger.error(f"Failed to download audio: HTTP {response.status_code}")
165
+ return {'success': False, 'error': f"Download failed: HTTP {response.status_code}"}
166
+
167
+ except Exception as e:
168
+ logger.exception(f"Error fetching YouTube audio for video ID {video_id}: {str(e)}")
169
+ return {'success': False, 'error': f"Error: {str(e)}"}
170
+
171
+ def extract_text_from_document(file: UploadFile) -> dict:
172
+ try:
173
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
174
+ content = file.file.read()
175
+ temp_file.write(content)
176
+ temp_file_path = temp_file.name
177
+
178
+ text = textract.process(temp_file_path).decode('utf-8')
179
+
180
+ os.unlink(temp_file_path)
181
+
182
+ return {
183
+ 'success': True,
184
+ 'extracted_text': text
185
+ }
186
+ except Exception as e:
187
+ return {
188
+ 'success': False,
189
+ 'error': f"Error extracting text from document: {str(e)}"
190
+ }
191
 
192
  def upload_to_s3(local_file, s3_file):
193
  s3_client = boto3.client(
 
210
  logger.error("Credentials not available")
211
  return None
212
 
213
+ def image_search(query: str, num_results: int = 5) -> dict:
214
+ try:
215
+ with DDGS(
216
+ headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'}
217
+ ) as ddgs:
218
+ results = list(ddgs.images(query, max_results=num_results))
219
+ formatted_results = [
220
+ {
221
+ 'title': result['title'],
222
+ 'image_url': result['image'],
223
+ 'thumbnail_url': result['thumbnail'],
224
+ 'source_url': result['url'],
225
+ 'width': result['width'],
226
+ 'height': result['height']
227
+ }
228
+ for result in results
229
+ ]
230
+ return {
231
+ 'success': True,
232
+ 'results': formatted_results
233
+ }
234
+ except Exception as e:
235
+ logger.error(f"Error performing image search: {e}")
236
+ return {
237
+ 'success': False,
238
+ 'error': f"Error performing image search: {str(e)}"
239
+ }
240
+
241
+ async def verify_api_key(api_key: str = Depends(api_key_header)):
242
+ if api_key != API_KEY:
243
+ raise HTTPException(status_code=401, detail="Invalid API Key")
244
+ return api_key
245
+
246
+ def download_stream(url, output_path):
247
+ headers = {
248
+ 'User-Agent': random.choice(USER_AGENTS),
249
+ 'Accept': '*/*',
250
+ 'Accept-Encoding': 'gzip, deflate, br',
251
+ 'Range': 'bytes=0-',
252
+ 'Connection': 'keep-alive',
253
+ }
254
+
255
+ with requests.get(url, headers=headers, stream=True) as r:
256
+ r.raise_for_status()
257
+ with open(output_path, 'wb') as f:
258
+ for chunk in r.iter_content(chunk_size=8192):
259
+ f.write(chunk)
260
+
261
+ def get_best_video_stream(streams):
262
+ video_streams = [s for s in streams if s['mimeType'].startswith('video/')]
263
+
264
+ # Try to get 720p or the highest quality below 720p
265
+ preferred_stream = next((s for s in video_streams if s['qualityLabel'] == '720p'), None)
266
+ if not preferred_stream:
267
+ # If 720p is not available, get the highest quality below 720p
268
+ below_720p = [s for s in video_streams if int(s['qualityLabel'][:-1]) < 720]
269
+ preferred_stream = max(below_720p, key=lambda x: int(x['qualityLabel'][:-1])) if below_720p else None
270
+
271
+ return preferred_stream
272
+
273
+ def get_best_audio_stream(streams):
274
+ audio_streams = [s for s in streams if s['mimeType'].startswith('audio/')]
275
+ return max(audio_streams, key=lambda x: x['bitrate']) if audio_streams else None
276
+
277
+ @app.get("/search-videos/")
278
+ async def search_videos(
279
+ query: str,
280
+ num_videos: int = Query(default=2, ge=1, le=10),
281
+ api_key: str = Depends(verify_api_key)
282
+ ):
283
+ videos = await search_and_get_videos(query, num_videos)
284
+ if not videos:
285
+ raise HTTPException(status_code=404, detail="No videos found or an error occurred during the search.")
286
+ return {"videos": videos}
287
+
288
+ @app.get("/get-audio/{video_id}")
289
+ async def get_audio(video_id: str, api_key: str = Depends(verify_api_key)):
290
+ result = get_youtube_audio(video_id)
291
+ if not result['success']:
292
+ raise HTTPException(status_code=500, detail=result['error'])
293
+
294
+ s3_file_name = f"{video_id}.mp3"
295
+ s3_url = upload_to_s3(result['temp_file_path'], s3_file_name)
296
+
297
+ if s3_url:
298
+ os.unlink(result['temp_file_path']) # Remove the temporary file
299
+ return {"audio_url": s3_url}
300
+ else:
301
+ raise HTTPException(status_code=500, detail="Failed to upload audio to S3")
302
+
303
+ @app.post("/get-video-streams")
304
+ async def get_video_streams(video_id: str, api_key: str = Depends(verify_api_key)):
305
+ try:
306
+ # Create an InnerTube client
307
+ client = InnerTube("WEB")
308
+
309
+ # Fetch video info
310
+ video_info = client.player(video_id)
311
+
312
+ # Get the best video and audio streams
313
+ streams = video_info["streamingData"]["adaptiveFormats"]
314
+ video_stream = get_best_video_stream(streams)
315
+ audio_stream = get_best_audio_stream(streams)
316
+
317
+ if not video_stream or not audio_stream:
318
+ raise HTTPException(status_code=404, detail="Could not find suitable video or audio stream")
319
+
320
+ # Download video and audio
321
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.mp4') as video_file, \
322
+ tempfile.NamedTemporaryFile(delete=False, suffix='.m4a') as audio_file:
323
+
324
+ download_stream(video_stream['url'], video_file.name)
325
+ download_stream(audio_stream['url'], audio_file.name)
326
+
327
+ # Combine video and audio
328
+ output_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
329
+
330
+ try:
331
+ (
332
+ ffmpeg
333
+ .input(video_file.name)
334
+ .input(audio_file.name)
335
+ .output(output_file.name, vcodec='libx264', acodec='aac')
336
+ .overwrite_output()
337
+ .run(capture_stdout=True, capture_stderr=True)
338
+ )
339
+ except ffmpeg.Error as e:
340
+ raise HTTPException(status_code=500, detail=f"Error combining video and audio: {e.stderr.decode()}")
341
+
342
+ # Upload combined video to S3
343
+ s3_file_name = f"{video_id}_combined.mp4"
344
+ s3_url = upload_to_s3(output_file.name, s3_file_name)
345
+
346
+ if s3_url:
347
+ return {"video_url": s3_url}
348
+ else:
349
+ raise HTTPException(status_code=500, detail="Failed to upload video to S3")
350
+
351
+ except Exception as e:
352
+ raise HTTPException(status_code=500, detail=f"Error processing video: {str(e)}")
353
+ finally:
354
+ # Clean up temporary files
355
+ for file in [video_file.name, audio_file.name, output_file.name]:
356
+ if os.path.exists(file):
357
+ os.unlink(file)
358
+
359
+ @app.post("/extract-text/")
360
+ async def extract_text(file: UploadFile, api_key: str = Depends(verify_api_key)):
361
+ result = extract_text_from_document(file)
362
+ if not result['success']:
363
+ raise HTTPException(status_code=500, detail=result['error'])
364
+ return {"extracted_text": result['extracted_text']}
365
+
366
+ @app.get("/image-search/")
367
+ async def image_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
368
+ result = image_search(query, num_results)
369
+ if not result['success']:
370
+ raise HTTPException(status_code=500, detail=result['error'])
371
+ return result
372
+
373
+ class DuckDuckGoSearch:
374
+ async def search(self, query: str, num_results: int = 5) -> list:
375
+ url = f"https://html.duckduckgo.com/html/?q={query}"
376
+ headers = {
377
+ "User-Agent": "Mozilla/5.0",
378
+ "Referer": "https://google.com/",
379
+ "Cookie": "kl=wt-wt",
380
+ }
381
+
382
+ async with aiohttp.ClientSession() as session:
383
+ async with session.get(url, headers=headers) as response:
384
+ if response.status != 200:
385
+ raise Exception("Failed to fetch data from DuckDuckGo")
386
+
387
+ html = await response.text()
388
+ soup = BeautifulSoup(html, "html.parser")
389
+ results = []
390
+
391
+ for result in soup.select(".result"):
392
+ title = result.select_one(".result__title .result__a")
393
+ url = result.select_one(".result__url")
394
+ desc = result.select_one(".result__snippet")
395
+
396
+ if title and url and desc:
397
+ results.append({
398
+ "title": title.get_text(strip=True),
399
+ "body": desc.get_text(strip=True),
400
+ "href": f"https://{url.get_text(strip=True)}",
401
+ })
402
+
403
+ if len(results) >= num_results:
404
+ break
405
+
406
+ return results
407
+
408
+ async def web_search(query: str, num_results: int = 5) -> dict:
409
+ try:
410
+ results = await DuckDuckGoSearch().search(query, num_results)
411
+ return {
412
+ 'success': True,
413
+ 'results': results
414
+ }
415
+ except Exception as e:
416
+ return {
417
+ 'success': False,
418
+ 'error': str(e)
419
+ }
420
+
421
+ @app.get("/web-search/")
422
+ async def web_search_endpoint(query: str, num_results: int = 5, api_key: str = Depends(verify_api_key)):
423
+ result = await web_search(query, num_results)
424
+ if not result['success']:
425
+ raise HTTPException(status_code=500, detail=result['error'])
426
+ return result, remplace get youtube audio by this def get_youtube_audio(video_id):
427
  try:
428
  logger.info(f"Starting YouTube audio extraction for video ID: {video_id}")
429
 
 
477
  logger.info(f"Downloaded {total_size / (1024 * 1024):.2f} MB")
478
 
479
  logger.info(f"Audio download completed. Total size: {total_size / (1024 * 1024):.2f} MB")
480
+ return {'success': True, 'temp_file_path': temp_file_path}
 
 
 
 
 
 
 
 
 
481
  else:
482
  logger.error(f"Failed to download audio: HTTP {response.status_code}")
483
  return {'success': False, 'error': f"Download failed: HTTP {response.status_code}"}
484
 
485
  except Exception as e:
486
  logger.exception(f"Error fetching YouTube audio for video ID {video_id}: {str(e)}")
487
+ return {'success': False, 'error': f"Error: {str(e)}"} , add ulpoad to s3 funtion and send the s3 file