fahadqazi commited on
Commit
9561150
·
verified ·
1 Parent(s): 565aa54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -3
app.py CHANGED
@@ -2,6 +2,8 @@ from transformers import pipeline
2
  import gradio as gr
3
  import os
4
  import torch
 
 
5
 
6
 
7
 
@@ -15,13 +17,33 @@ if not auth_token:
15
  pipe = pipeline(model="fahadqazi/whisper-small-sindhi", device=device, token=auth_token) # change to "your-username/the-name-you-picked"
16
 
17
  def transcribe(audio):
18
- text = pipe(audio)["text"]
19
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  iface = gr.Interface(
22
  fn=transcribe,
23
  inputs=gr.Audio(type="filepath"),
24
- outputs="text",
25
  title="Whisper Small Sindhi",
26
  description="Realtime demo for Sindhi speech recognition using a fine-tuned Whisper small model.",
27
  )
 
2
  import gradio as gr
3
  import os
4
  import torch
5
+ import srt
6
+ from datetime import timedelta
7
 
8
 
9
 
 
17
  pipe = pipeline(model="fahadqazi/whisper-small-sindhi", device=device, token=auth_token) # change to "your-username/the-name-you-picked"
18
 
19
  def transcribe(audio):
20
+ # Perform transcription
21
+ result = pipe(audio)
22
+ transcription = result["text"]
23
+
24
+ # Generate timestamps for transcription (You might need to tweak this to match your desired chunks)
25
+ segments = result.get("chunks", []) # Assuming the model returns chunks (this depends on model and pipeline)
26
+
27
+ # Create an SRT object
28
+ subtitle_generator = []
29
+ start_time = timedelta(seconds=0)
30
+
31
+ for i, segment in enumerate(segments):
32
+ end_time = start_time + timedelta(seconds=segment["end"]) # Using segment['end'] to create time intervals
33
+ subtitle_generator.append(srt.Subtitle(index=i+1, start=start_time, end=end_time, content=segment["text"]))
34
+ start_time = end_time # Update start_time for next subtitle
35
+
36
+ # Write subtitles to .srt file
37
+ srt_file = "output.srt"
38
+ with open(srt_file, "w") as f:
39
+ f.write(srt.compose(subtitle_generator))
40
+
41
+ return transcription, srt_file
42
 
43
  iface = gr.Interface(
44
  fn=transcribe,
45
  inputs=gr.Audio(type="filepath"),
46
+ outputs=["text", "file"],
47
  title="Whisper Small Sindhi",
48
  description="Realtime demo for Sindhi speech recognition using a fine-tuned Whisper small model.",
49
  )