Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,22 +1,33 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import os
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
from fastapi.middleware.cors import CORSMiddleware
|
|
|
5 |
from google import genai
|
6 |
from google.genai import types
|
7 |
-
from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, save_audio, collect_chunks
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
|
10 |
import torch
|
11 |
-
from transformers import
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
import os
|
19 |
-
import tempfile
|
20 |
|
21 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
22 |
|
@@ -78,7 +89,28 @@ async def translate_audio(
|
|
78 |
srcLang: str = Form("Tagalog"),
|
79 |
tgtLang: str = Form("Cebuano")
|
80 |
):
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
try:
|
83 |
content = await file.read()
|
84 |
with open(file.filename, 'wb') as f:
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
import os
|
3 |
+
import uuid
|
4 |
+
import tempfile
|
5 |
+
import numpy as np
|
6 |
+
import scipy.io.wavfile
|
7 |
+
|
8 |
+
from fastapi import FastAPI, UploadFile, File, HTTPException, Form
|
9 |
+
from fastapi.responses import FileResponse
|
10 |
from fastapi.middleware.cors import CORSMiddleware
|
11 |
+
|
12 |
from google import genai
|
13 |
from google.genai import types
|
|
|
14 |
|
15 |
+
from silero_vad import (
|
16 |
+
load_silero_vad,
|
17 |
+
read_audio,
|
18 |
+
get_speech_timestamps,
|
19 |
+
save_audio,
|
20 |
+
collect_chunks,
|
21 |
+
)
|
22 |
|
23 |
import torch
|
24 |
+
from transformers import (
|
25 |
+
WhisperProcessor,
|
26 |
+
WhisperForConditionalGeneration,
|
27 |
+
pipeline,
|
28 |
+
VitsModel,
|
29 |
+
AutoTokenizer,
|
30 |
+
)
|
|
|
|
|
31 |
|
32 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
33 |
|
|
|
89 |
srcLang: str = Form("Tagalog"),
|
90 |
tgtLang: str = Form("Cebuano")
|
91 |
):
|
92 |
+
"""
|
93 |
+
Endpoint to translate audio files.
|
94 |
+
This endpoint accepts an audio file, processes it to remove silence, transcribes the audio,
|
95 |
+
and translates the transcribed text from the source language to the target language.
|
96 |
+
Args:
|
97 |
+
file (UploadFile): The audio file to be uploaded and processed.
|
98 |
+
srcLang (str): The source language of the audio transcription. Defaults to "Tagalog".
|
99 |
+
tgtLang (str): The target language for translation. Defaults to "Cebuano".
|
100 |
+
Returns:
|
101 |
+
dict: A dictionary containing:
|
102 |
+
- transcribed_text (str): The transcribed text from the audio.
|
103 |
+
- translated_text (str): The translated text from the source language to the target language.
|
104 |
+
- srcLang (str): The source language used for transcription.
|
105 |
+
- tgtLang (str): The target language used for translation.
|
106 |
+
Raises:
|
107 |
+
HTTPException: If an error occurs during processing, a 500 status code is returned with the error details.
|
108 |
+
Notes:
|
109 |
+
- The uploaded file is temporarily saved to disk for processing and removed after completion.
|
110 |
+
- Silence is removed from the audio file before transcription.
|
111 |
+
- The transcription and translation processes are performed asynchronously.
|
112 |
+
"""
|
113 |
+
|
114 |
try:
|
115 |
content = await file.read()
|
116 |
with open(file.filename, 'wb') as f:
|