Update app.py
Browse files
app.py
CHANGED
|
@@ -20,6 +20,7 @@ import boto3
|
|
| 20 |
from botocore.exceptions import NoCredentialsError
|
| 21 |
import time
|
| 22 |
import tempfile
|
|
|
|
| 23 |
|
| 24 |
# Import functions from other modules
|
| 25 |
from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
|
|
@@ -81,49 +82,49 @@ def extract_audio_from_file(input_bytes):
|
|
| 81 |
temp_file_path = temp_file.name
|
| 82 |
|
| 83 |
try:
|
| 84 |
-
#
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
try:
|
| 89 |
-
# Try to read as a video file
|
| 90 |
video = VideoFileClip(temp_file_path)
|
| 91 |
audio = video.audio
|
| 92 |
if audio is not None:
|
| 93 |
-
# Extract audio from video
|
| 94 |
audio_array = audio.to_soundarray()
|
| 95 |
sample_rate = audio.fps
|
| 96 |
-
|
| 97 |
-
# Convert to mono if stereo
|
| 98 |
-
if len(audio_array.shape) > 1 and audio_array.shape[1] > 1:
|
| 99 |
-
audio_array = audio_array.mean(axis=1)
|
| 100 |
-
|
| 101 |
-
# Ensure audio is float32 and normalized
|
| 102 |
audio_array = audio_array.astype(np.float32)
|
| 103 |
audio_array /= np.max(np.abs(audio_array))
|
| 104 |
-
|
| 105 |
video.close()
|
|
|
|
| 106 |
return audio_array, sample_rate
|
| 107 |
else:
|
| 108 |
-
|
| 109 |
-
except Exception:
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
raise ValueError(f"Unsupported file format: {str(e)}")
|
| 125 |
finally:
|
| 126 |
-
# Clean up the temporary file
|
| 127 |
os.unlink(temp_file_path)
|
| 128 |
|
| 129 |
@app.post("/transcribe")
|
|
@@ -342,7 +343,8 @@ async def identify_language_file(
|
|
| 342 |
processing_time = time.time() - start_time
|
| 343 |
return JSONResponse(
|
| 344 |
status_code=500,
|
| 345 |
-
content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time}
|
|
|
|
| 346 |
|
| 347 |
@app.post("/asr_languages")
|
| 348 |
async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
|
|
@@ -392,12 +394,10 @@ async def get_tts_languages(request: LanguageRequest, api_key: APIKey = Depends(
|
|
| 392 |
content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
|
| 393 |
)
|
| 394 |
|
| 395 |
-
# If you want to add a health check endpoint
|
| 396 |
@app.get("/health")
|
| 397 |
async def health_check():
|
| 398 |
return {"status": "ok"}
|
| 399 |
|
| 400 |
-
# You might also want to add a root endpoint that provides basic API information
|
| 401 |
@app.get("/")
|
| 402 |
async def root():
|
| 403 |
return {
|
|
|
|
| 20 |
from botocore.exceptions import NoCredentialsError
|
| 21 |
import time
|
| 22 |
import tempfile
|
| 23 |
+
import magic
|
| 24 |
|
| 25 |
# Import functions from other modules
|
| 26 |
from asr import transcribe, ASR_LANGUAGES, ASR_SAMPLING_RATE
|
|
|
|
| 82 |
temp_file_path = temp_file.name
|
| 83 |
|
| 84 |
try:
|
| 85 |
+
# Log file info
|
| 86 |
+
file_info = magic.from_file(temp_file_path, mime=True)
|
| 87 |
+
logger.info(f"Received file of type: {file_info}")
|
| 88 |
+
|
| 89 |
+
# Try reading with soundfile first
|
| 90 |
+
try:
|
| 91 |
+
audio_array, sample_rate = sf.read(temp_file_path)
|
| 92 |
+
logger.info(f"Successfully read audio with soundfile. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
|
| 93 |
+
return audio_array, sample_rate
|
| 94 |
+
except Exception as e:
|
| 95 |
+
logger.info(f"Could not read with soundfile: {str(e)}")
|
| 96 |
+
|
| 97 |
+
# Try reading as video
|
| 98 |
try:
|
|
|
|
| 99 |
video = VideoFileClip(temp_file_path)
|
| 100 |
audio = video.audio
|
| 101 |
if audio is not None:
|
|
|
|
| 102 |
audio_array = audio.to_soundarray()
|
| 103 |
sample_rate = audio.fps
|
| 104 |
+
audio_array = audio_array.mean(axis=1) if len(audio_array.shape) > 1 and audio_array.shape[1] > 1 else audio_array
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
audio_array = audio_array.astype(np.float32)
|
| 106 |
audio_array /= np.max(np.abs(audio_array))
|
|
|
|
| 107 |
video.close()
|
| 108 |
+
logger.info(f"Successfully extracted audio from video. Shape: {audio_array.shape}, Sample rate: {sample_rate}")
|
| 109 |
return audio_array, sample_rate
|
| 110 |
else:
|
| 111 |
+
logger.info("Video file contains no audio")
|
| 112 |
+
except Exception as e:
|
| 113 |
+
logger.info(f"Could not read as video: {str(e)}")
|
| 114 |
+
|
| 115 |
+
# Try reading with pydub
|
| 116 |
+
try:
|
| 117 |
+
audio = AudioSegment.from_file(temp_file_path)
|
| 118 |
+
audio_array = np.array(audio.get_array_of_samples())
|
| 119 |
+
audio_array = audio_array.astype(np.float32) / (2**15 if audio.sample_width == 2 else 2**7)
|
| 120 |
+
audio_array = audio_array.reshape((-1, 2)).mean(axis=1) if audio.channels == 2 else audio_array
|
| 121 |
+
logger.info(f"Successfully read audio with pydub. Shape: {audio_array.shape}, Sample rate: {audio.frame_rate}")
|
| 122 |
+
return audio_array, audio.frame_rate
|
| 123 |
+
except Exception as e:
|
| 124 |
+
logger.info(f"Could not read with pydub: {str(e)}")
|
| 125 |
+
|
| 126 |
+
raise ValueError(f"Unsupported file format: {file_info}")
|
|
|
|
| 127 |
finally:
|
|
|
|
| 128 |
os.unlink(temp_file_path)
|
| 129 |
|
| 130 |
@app.post("/transcribe")
|
|
|
|
| 343 |
processing_time = time.time() - start_time
|
| 344 |
return JSONResponse(
|
| 345 |
status_code=500,
|
| 346 |
+
content={"message": "An error occurred during language identification", "details": error_details, "processing_time_seconds": processing_time}
|
| 347 |
+
)
|
| 348 |
|
| 349 |
@app.post("/asr_languages")
|
| 350 |
async def get_asr_languages(request: LanguageRequest, api_key: APIKey = Depends(get_api_key)):
|
|
|
|
| 394 |
content={"message": "An error occurred while fetching TTS languages", "details": error_details, "processing_time_seconds": processing_time}
|
| 395 |
)
|
| 396 |
|
|
|
|
| 397 |
@app.get("/health")
|
| 398 |
async def health_check():
|
| 399 |
return {"status": "ok"}
|
| 400 |
|
|
|
|
| 401 |
@app.get("/")
|
| 402 |
async def root():
|
| 403 |
return {
|