JabriA commited on
Commit
6c74174
·
1 Parent(s): 105a56b

Add transcription and topic extraction app

Browse files
Files changed (2) hide show
  1. app.py +29 -9
  2. requirements.txt +3 -3
app.py CHANGED
@@ -1,17 +1,36 @@
1
  import gradio as gr
2
- import whisper
3
- from transformers import pipeline
 
4
 
5
  # Load models
6
- model = whisper.load_model("base")
 
 
 
 
7
  summarizer = pipeline("summarization", model="t5-small")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Function to transcribe and summarize
10
  def transcribe_and_summarize(audio_file):
11
  # Transcription
12
- result = model.transcribe(audio_file)
13
- transcription = result["text"]
14
-
15
  # Summarization
16
  summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
17
  return transcription, summary
@@ -27,9 +46,10 @@ app = gr.Interface(
27
  fn=transcribe_and_summarize,
28
  inputs=inputs,
29
  outputs=outputs,
30
- title="Audio Transcription and Summarization",
31
- description="Upload an audio file to get its transcription and a summarized version of the content."
32
  )
33
 
34
  # Launch the app
35
- app.launch()
 
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, pipeline
4
+ import soundfile as sf
5
 
6
  # Load models
7
+ # Transcription model for Moroccan Darija
8
+ processor = Wav2Vec2Processor.from_pretrained("achrafkhannoussi/Wav2Vec2-Large-XLSR-53-Moroccan-Darija")
9
+ transcription_model = Wav2Vec2ForCTC.from_pretrained("achrafkhannoussi/Wav2Vec2-Large-XLSR-53-Moroccan-Darija")
10
+
11
+ # Summarization model
12
  summarizer = pipeline("summarization", model="t5-small")
13
 
14
+ # Function to transcribe audio using Wav2Vec2
15
+ def transcribe_audio(audio_path):
16
+ # Load and preprocess audio
17
+ audio_input, sample_rate = sf.read(audio_path)
18
+ inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt", padding=True)
19
+
20
+ # Get predictions
21
+ with torch.no_grad():
22
+ logits = transcription_model(**inputs).logits
23
+
24
+ # Decode predictions
25
+ predicted_ids = torch.argmax(logits, dim=-1)
26
+ transcription = processor.batch_decode(predicted_ids)[0]
27
+ return transcription
28
+
29
  # Function to transcribe and summarize
30
  def transcribe_and_summarize(audio_file):
31
  # Transcription
32
+ transcription = transcribe_audio(audio_file)
33
+
 
34
  # Summarization
35
  summary = summarizer(transcription, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
36
  return transcription, summary
 
46
  fn=transcribe_and_summarize,
47
  inputs=inputs,
48
  outputs=outputs,
49
+ title="Moroccan Darija Audio Transcription and Summarization",
50
+ description="Upload an audio file in Moroccan Darija to get its transcription and a summarized version of the content."
51
  )
52
 
53
  # Launch the app
54
+ if __name__ == "__main__":
55
+ app.launch()
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- openai-whisper
2
- gradio>=3.40.2
3
  transformers
4
- torch # Required by Whisper
 
 
1
+ gradio
 
2
  transformers
3
+ torch
4
+ soundfile