geethareddy commited on
Commit
d445f81
·
verified ·
1 Parent(s): 0ebd9ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -10
app.py CHANGED
@@ -1,23 +1,26 @@
1
  from flask import Flask, render_template, request, jsonify
 
2
  import torch
 
3
  from transformers import pipeline
4
  from gtts import gTTS
5
- import os
6
  import re
7
- from waitress import serve # Use Waitress for Production
 
 
8
 
9
  app = Flask(__name__)
10
 
11
- # Load Whisper Model for English Transcription
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
13
- asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
14
 
15
  # Function to generate audio prompts
16
  def generate_audio_prompt(text, filename):
17
  tts = gTTS(text=text, lang="en")
18
  tts.save(os.path.join("static", filename))
19
 
20
- # Generate audio prompts
21
  prompts = {
22
  "welcome": "Welcome to Biryani Hub.",
23
  "ask_name": "Tell me your name.",
@@ -28,7 +31,7 @@ prompts = {
28
  for key, text in prompts.items():
29
  generate_audio_prompt(text, f"{key}.mp3")
30
 
31
- # Mapping for correctly converting spoken symbols to text
32
  SYMBOL_MAPPING = {
33
  "at the rate": "@",
34
  "at": "@",
@@ -41,12 +44,24 @@ SYMBOL_MAPPING = {
41
  "space": " "
42
  }
43
 
44
- # Function to clean and process transcribed text
45
  def clean_transcription(text):
46
  text = text.lower()
 
47
  for word, symbol in SYMBOL_MAPPING.items():
48
  text = text.replace(word, symbol)
49
- return text
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  @app.route("/")
52
  def index():
@@ -62,13 +77,16 @@ def transcribe():
62
  audio_file.save(audio_path)
63
 
64
  try:
65
- # Transcribe audio to text with Whisper
 
 
66
  result = asr_model(audio_path, generate_kwargs={"language": "en"})
67
  transcribed_text = clean_transcription(result["text"])
 
68
  return jsonify({"text": transcribed_text})
69
  except Exception as e:
70
  return jsonify({"error": str(e)}), 500
71
 
72
- # Run Waitress Server for Production
73
  if __name__ == "__main__":
74
  serve(app, host="0.0.0.0", port=7860)
 
1
  from flask import Flask, render_template, request, jsonify
2
+ import os
3
  import torch
4
+ import speech_recognition as sr
5
  from transformers import pipeline
6
  from gtts import gTTS
 
7
  import re
8
+ from pydub import AudioSegment
9
+ from pydub.silence import detect_nonsilent
10
+ from waitress import serve
11
 
12
  app = Flask(__name__)
13
 
14
+ # Load Whisper Model for Accurate Speech-to-Text
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
17
 
18
  # Function to generate audio prompts
19
  def generate_audio_prompt(text, filename):
20
  tts = gTTS(text=text, lang="en")
21
  tts.save(os.path.join("static", filename))
22
 
23
+ # Generate all required voice prompts
24
  prompts = {
25
  "welcome": "Welcome to Biryani Hub.",
26
  "ask_name": "Tell me your name.",
 
31
  for key, text in prompts.items():
32
  generate_audio_prompt(text, f"{key}.mp3")
33
 
34
+ # Symbol mapping for better recognition
35
  SYMBOL_MAPPING = {
36
  "at the rate": "@",
37
  "at": "@",
 
44
  "space": " "
45
  }
46
 
47
+ # Function to clean and format transcribed text properly
48
  def clean_transcription(text):
49
  text = text.lower()
50
+ text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces
51
  for word, symbol in SYMBOL_MAPPING.items():
52
  text = text.replace(word, symbol)
53
+ return text.capitalize()
54
+
55
+ # Function to detect speech duration and avoid cutting words
56
+ def trim_silence(audio_path):
57
+ audio = AudioSegment.from_wav(audio_path)
58
+ nonsilent_parts = detect_nonsilent(audio, min_silence_len=700, silence_thresh=audio.dBFS-16)
59
+
60
+ if nonsilent_parts:
61
+ start_trim = nonsilent_parts[0][0]
62
+ end_trim = nonsilent_parts[-1][1]
63
+ trimmed_audio = audio[start_trim:end_trim]
64
+ trimmed_audio.export(audio_path, format="wav") # Save trimmed audio
65
 
66
  @app.route("/")
67
  def index():
 
77
  audio_file.save(audio_path)
78
 
79
  try:
80
+ trim_silence(audio_path) # Trim silence before processing
81
+
82
+ # Force Whisper to transcribe only in English
83
  result = asr_model(audio_path, generate_kwargs={"language": "en"})
84
  transcribed_text = clean_transcription(result["text"])
85
+
86
  return jsonify({"text": transcribed_text})
87
  except Exception as e:
88
  return jsonify({"error": str(e)}), 500
89
 
90
+ # Use Waitress for Production Server
91
  if __name__ == "__main__":
92
  serve(app, host="0.0.0.0", port=7860)