rbcurzon commited on
Commit
1b38b1a
·
verified ·
1 Parent(s): b5c3008

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -12
app.py CHANGED
@@ -1,22 +1,33 @@
1
  # -*- coding: utf-8 -*-
2
  import os
3
- from fastapi import FastAPI, WebSocket, UploadFile, File, HTTPException, Form
 
 
 
 
 
 
4
  from fastapi.middleware.cors import CORSMiddleware
 
5
  from google import genai
6
  from google.genai import types
7
- from silero_vad import load_silero_vad, read_audio, get_speech_timestamps, save_audio, collect_chunks
8
 
 
 
 
 
 
 
 
9
 
10
  import torch
11
- from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
12
- from fastapi.responses import FileResponse
13
- from transformers import VitsModel, AutoTokenizer
14
- import numpy as np
15
- import scipy
16
- from IPython.display import Audio
17
- import uuid
18
- import os
19
- import tempfile
20
 
21
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
22
 
@@ -78,7 +89,28 @@ async def translate_audio(
78
  srcLang: str = Form("Tagalog"),
79
  tgtLang: str = Form("Cebuano")
80
  ):
81
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  try:
83
  content = await file.read()
84
  with open(file.filename, 'wb') as f:
 
1
  # -*- coding: utf-8 -*-
2
  import os
3
+ import uuid
4
+ import tempfile
5
+ import numpy as np
6
+ import scipy.io.wavfile
7
+
8
+ from fastapi import FastAPI, UploadFile, File, HTTPException, Form
9
+ from fastapi.responses import FileResponse
10
  from fastapi.middleware.cors import CORSMiddleware
11
+
12
  from google import genai
13
  from google.genai import types
 
14
 
15
+ from silero_vad import (
16
+ load_silero_vad,
17
+ read_audio,
18
+ get_speech_timestamps,
19
+ save_audio,
20
+ collect_chunks,
21
+ )
22
 
23
  import torch
24
+ from transformers import (
25
+ WhisperProcessor,
26
+ WhisperForConditionalGeneration,
27
+ pipeline,
28
+ VitsModel,
29
+ AutoTokenizer,
30
+ )
 
 
31
 
32
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
33
 
 
89
  srcLang: str = Form("Tagalog"),
90
  tgtLang: str = Form("Cebuano")
91
  ):
92
+ """
93
+ Endpoint to translate audio files.
94
+ This endpoint accepts an audio file, processes it to remove silence, transcribes the audio,
95
+ and translates the transcribed text from the source language to the target language.
96
+ Args:
97
+ file (UploadFile): The audio file to be uploaded and processed.
98
+ srcLang (str): The source language of the audio transcription. Defaults to "Tagalog".
99
+ tgtLang (str): The target language for translation. Defaults to "Cebuano".
100
+ Returns:
101
+ dict: A dictionary containing:
102
+ - transcribed_text (str): The transcribed text from the audio.
103
+ - translated_text (str): The translated text from the source language to the target language.
104
+ - srcLang (str): The source language used for transcription.
105
+ - tgtLang (str): The target language used for translation.
106
+ Raises:
107
+ HTTPException: If an error occurs during processing, a 500 status code is returned with the error details.
108
+ Notes:
109
+ - The uploaded file is temporarily saved to disk for processing and removed after completion.
110
+ - Silence is removed from the audio file before transcription.
111
+ - The transcription and translation processes are performed asynchronously.
112
+ """
113
+
114
  try:
115
  content = await file.read()
116
  with open(file.filename, 'wb') as f: