Spaces:

rbcurzon
/

speech-to-text

Running

App Files Files Community

rbcurzon commited on Mar 30

Commit

ad2f36e

verified ·

1 Parent(s): abdeb75

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -19

app.py CHANGED Viewed

@@ -42,14 +42,15 @@ def translate(text, srcLang, tgtLang):
     print(response)
     return response.text
 from tempfile import NamedTemporaryFile
 from fastapi import UploadFile, Form, File
 from pathlib import Path
 from typing import Annotated
-import shutil
-import aiofiles
-# def save_upload_file_tmp(upload_file: UploadFile) -> Path:
 app = FastAPI(
     title="Real-Time Audio Processor",
@@ -66,26 +67,47 @@ async def test(file: UploadFile = File(...),
     #     content = await file.read()  # async read
     #     await out_file.write(content)  # async write
     try:
-        content = await file.read()
-        with open("temp_audio.wav", 'wb') as f:
             f.write(content)
-    except Exception:
-        raise HTTPException(status_code=500, detail='Something went wrong')
     finally:
-        file.file.close()
-    print(f"Successfully uploaded {file.filename}")
-    result = pipe("temp_audio.wav",
-                  batch_size=8,
-                  return_timestamps=True,
-                  generate_kwargs={"language": "tagalog",
-                                   "return_timestamps": True,})
-    translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang)
-    os.remove("temp_audio.wav")
-    return {"transcribed_text": result['text'], "translated_text": translatedResult, "srcLang": srcLang, "tgtLang": tgtLang}
 @app.post("/translateText/")
 async def test(text: str,

     print(response)
     return response.text
+import shutil
+import aiofiles
 from tempfile import NamedTemporaryFile
 from fastapi import UploadFile, Form, File
 from pathlib import Path
 from typing import Annotated
+from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, save_audio, collect_chunks
+model = load_silero_vad()
 app = FastAPI(
     title="Real-Time Audio Processor",
     #     content = await file.read()  # async read
     #     await out_file.write(content)  # async write
     try:
+        content = await file.read()
+        with open(file.filename, 'wb') as f:
             f.write(content)
+            print(f"Successfully uploaded {file.filename}")
+        wav = read_audio(file.filename)
+        speech_timestamps = get_speech_timestamps(wav, model)
+        save_audio(
+            "only_speech.wav",
+            collect_chunks(speech_timestamps, wav),
+            sampling_rate=16000
+        )
+        result = pipe(
+            "only_speech.wav", # Transcribe audio
+            batch_size=8,
+            return_timestamps=True,
+            generate_kwargs={"language": "tagalog","return_timestamps": True,}
+        )
+        translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang)
+        return {
+            "transcribed_text": result['text'],
+            "translated_text": translatedResult,
+            "srcLang": srcLang,
+            "tgtLang": tgtLang
+        }
+    except Exception as error:
+        print("Error: ", str(error)))
+        raise HTTPException(status_code=500, detail=str(error))
     finally:
+        if file.file:
+            file.file.close()
+        if os.path.exists(file.filename)
+            os.remove(file.filename)
+        if os.path.exists("only_speech.wav")
+            os.remove("only_speech.wav")
 @app.post("/translateText/")
 async def test(text: str,