Spaces:

rbcurzon
/

speech-to-text

Sleeping

rbcurzon commited on Apr 6

Commit

ba04059

verified ·

1 Parent(s): 71b7415

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,11 +14,16 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
 # load model and processor
 model_id = "rbcurzon/whisper-small-fil"
-pipe = pipeline("automatic-speech-recognition",
-                model=model_id,
-                chunk_length_s=30,
-                device=device)
 model = load_silero_vad()
 client = genai.Client(api_key=os.environ.get("GENAI_API_KEY")) # Do not share api key
 """**FastAPI**"""
@@ -58,11 +63,17 @@ async def test(
             sampling_rate=16000
         )
         result = pipe(
             "only_speech.wav", # Transcribe audio
             batch_size=8,
             return_timestamps=True,
-            generate_kwargs={"language": "tagalog","return_timestamps": True,}
         )
         print(result)
@@ -103,4 +114,11 @@ async def test(text: str,
     result = translate(text, srcLang, tgtLang)
     print('Raw: ', text)
     print('Translated: ', result)
-    return {'translated_text': result}

 # load model and processor
 model_id = "rbcurzon/whisper-small-fil"
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model_id,
+    chunk_length_s=30,
+    device=device
+)
 model = load_silero_vad()
 client = genai.Client(api_key=os.environ.get("GENAI_API_KEY")) # Do not share api key
 """**FastAPI**"""
             sampling_rate=16000
         )
+        generate_kwargs = {
+            "language": "tagalog",
+            "return_timestamps": True,
+            "condition_on_prev_tokens": False,
+        }
         result = pipe(
             "only_speech.wav", # Transcribe audio
             batch_size=8,
             return_timestamps=True,
+            generate_kwargs=generate_kwargs
         )
         print(result)
     result = translate(text, srcLang, tgtLang)
     print('Raw: ', text)
     print('Translated: ', result)
+    result_dict = {
+        "text": text,
+        "translated_text": result,
+        "srcLang": srcLang,
+        "tgtLang": tgtLang
+    }
+    return result_dict