Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,8 @@ from transformers import pipeline
|
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
import torch
|
|
|
|
|
5 |
|
6 |
|
7 |
|
@@ -15,13 +17,33 @@ if not auth_token:
|
|
15 |
pipe = pipeline(model="fahadqazi/whisper-small-sindhi", device=device, token=auth_token) # change to "your-username/the-name-you-picked"
|
16 |
|
17 |
def transcribe(audio):
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
iface = gr.Interface(
|
22 |
fn=transcribe,
|
23 |
inputs=gr.Audio(type="filepath"),
|
24 |
-
outputs="text",
|
25 |
title="Whisper Small Sindhi",
|
26 |
description="Realtime demo for Sindhi speech recognition using a fine-tuned Whisper small model.",
|
27 |
)
|
|
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
import torch
|
5 |
+
import srt
|
6 |
+
from datetime import timedelta
|
7 |
|
8 |
|
9 |
|
|
|
17 |
pipe = pipeline(model="fahadqazi/whisper-small-sindhi", device=device, token=auth_token) # change to "your-username/the-name-you-picked"
|
18 |
|
19 |
def transcribe(audio):
|
20 |
+
# Perform transcription
|
21 |
+
result = pipe(audio)
|
22 |
+
transcription = result["text"]
|
23 |
+
|
24 |
+
# Generate timestamps for transcription (You might need to tweak this to match your desired chunks)
|
25 |
+
segments = result.get("chunks", []) # Assuming the model returns chunks (this depends on model and pipeline)
|
26 |
+
|
27 |
+
# Create an SRT object
|
28 |
+
subtitle_generator = []
|
29 |
+
start_time = timedelta(seconds=0)
|
30 |
+
|
31 |
+
for i, segment in enumerate(segments):
|
32 |
+
end_time = start_time + timedelta(seconds=segment["end"]) # Using segment['end'] to create time intervals
|
33 |
+
subtitle_generator.append(srt.Subtitle(index=i+1, start=start_time, end=end_time, content=segment["text"]))
|
34 |
+
start_time = end_time # Update start_time for next subtitle
|
35 |
+
|
36 |
+
# Write subtitles to .srt file
|
37 |
+
srt_file = "output.srt"
|
38 |
+
with open(srt_file, "w") as f:
|
39 |
+
f.write(srt.compose(subtitle_generator))
|
40 |
+
|
41 |
+
return transcription, srt_file
|
42 |
|
43 |
iface = gr.Interface(
|
44 |
fn=transcribe,
|
45 |
inputs=gr.Audio(type="filepath"),
|
46 |
+
outputs=["text", "file"],
|
47 |
title="Whisper Small Sindhi",
|
48 |
description="Realtime demo for Sindhi speech recognition using a fine-tuned Whisper small model.",
|
49 |
)
|