Spaces:

fahadqazi
/

Sindhi-Speech-to-Text

Sleeping

App Files Files Community

Sindhi-Speech-to-Text / app.py

fahadqazi

Update app.py

9561150 verified 16 days ago

raw

history blame

1.71 kB

	from transformers import pipeline
	import gradio as gr
	import os
	import torch
	import srt
	from datetime import timedelta



	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


	auth_token = os.environ.get("hf_token")
	if not auth_token:
	raise ValueError("Hugging Face token is missing! Add it as a secret.")

	pipe = pipeline(model="fahadqazi/whisper-small-sindhi", device=device, token=auth_token) # change to "your-username/the-name-you-picked"

	def transcribe(audio):
	# Perform transcription
	result = pipe(audio)
	transcription = result["text"]

	# Generate timestamps for transcription (You might need to tweak this to match your desired chunks)
	segments = result.get("chunks", []) # Assuming the model returns chunks (this depends on model and pipeline)

	# Create an SRT object
	subtitle_generator = []
	start_time = timedelta(seconds=0)

	for i, segment in enumerate(segments):
	end_time = start_time + timedelta(seconds=segment["end"]) # Using segment['end'] to create time intervals
	subtitle_generator.append(srt.Subtitle(index=i+1, start=start_time, end=end_time, content=segment["text"]))
	start_time = end_time # Update start_time for next subtitle

	# Write subtitles to .srt file
	srt_file = "output.srt"
	with open(srt_file, "w") as f:
	f.write(srt.compose(subtitle_generator))

	return transcription, srt_file

	iface = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(type="filepath"),
	outputs=["text", "file"],
	title="Whisper Small Sindhi",
	description="Realtime demo for Sindhi speech recognition using a fine-tuned Whisper small model.",
	)

	iface.launch()