Update app.py
Browse files
app.py
CHANGED
|
@@ -37,17 +37,36 @@ def format_timestamp(seconds: float, always_include_hours: bool = False, decimal
|
|
| 37 |
return seconds
|
| 38 |
|
| 39 |
|
| 40 |
-
def
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
|
| 53 |
demo = gr.Blocks()
|
|
|
|
| 37 |
return seconds
|
| 38 |
|
| 39 |
|
| 40 |
+
def transcribe_speech(filepath):
|
| 41 |
+
# Load the audio
|
| 42 |
+
audio, sampling_rate = librosa.load(filepath, sr=16000)
|
| 43 |
+
|
| 44 |
+
# Define chunk size (e.g., 30 seconds)
|
| 45 |
+
chunk_duration = 30 # in seconds
|
| 46 |
+
chunk_samples = chunk_duration * sampling_rate
|
| 47 |
+
|
| 48 |
+
# Process audio in chunks
|
| 49 |
+
transcription = []
|
| 50 |
+
for i in range(0, len(audio), chunk_samples):
|
| 51 |
+
chunk = audio[i:i + chunk_samples]
|
| 52 |
+
|
| 53 |
+
# Convert the chunk into input features
|
| 54 |
+
inputs = processor(audio=chunk, sampling_rate=16000, return_tensors="pt").input_features
|
| 55 |
+
|
| 56 |
+
# Generate transcription for the chunk
|
| 57 |
+
generated_ids = model.generate(
|
| 58 |
+
inputs,
|
| 59 |
+
max_new_tokens=444, # Max allowed by Whisper
|
| 60 |
+
forced_decoder_ids=processor.get_decoder_prompt_ids(language="id", task="transcribe")
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
# Decode and append the transcription
|
| 64 |
+
chunk_transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
| 65 |
+
transcription.append(chunk_transcription)
|
| 66 |
+
|
| 67 |
+
# Combine all chunk transcriptions into a single string
|
| 68 |
+
return " ".join(transcription)
|
| 69 |
+
|
| 70 |
|
| 71 |
|
| 72 |
demo = gr.Blocks()
|