Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import gradio as gr | |
import os | |
import torch | |
import srt | |
from datetime import timedelta | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
auth_token = os.environ.get("hf_token") | |
if not auth_token: | |
raise ValueError("Hugging Face token is missing! Add it as a secret.") | |
pipe = pipeline(model="fahadqazi/whisper-small-sindhi", device=device, token=auth_token) # change to "your-username/the-name-you-picked" | |
def transcribe(audio): | |
# Perform transcription | |
result = pipe(audio) | |
transcription = result["text"] | |
# Generate timestamps for transcription (You might need to tweak this to match your desired chunks) | |
segments = result.get("chunks", []) # Assuming the model returns chunks (this depends on model and pipeline) | |
# Create an SRT object | |
subtitle_generator = [] | |
start_time = timedelta(seconds=0) | |
for i, segment in enumerate(segments): | |
end_time = start_time + timedelta(seconds=segment["end"]) # Using segment['end'] to create time intervals | |
subtitle_generator.append(srt.Subtitle(index=i+1, start=start_time, end=end_time, content=segment["text"])) | |
start_time = end_time # Update start_time for next subtitle | |
# Write subtitles to .srt file | |
srt_file = "output.srt" | |
with open(srt_file, "w") as f: | |
f.write(srt.compose(subtitle_generator)) | |
return transcription, srt_file | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(type="filepath"), | |
outputs=["text", "file"], | |
title="Whisper Small Sindhi", | |
description="Realtime demo for Sindhi speech recognition using a fine-tuned Whisper small model.", | |
) | |
iface.launch() |