lokesh341 commited on
Commit
9061ed1
·
verified ·
1 Parent(s): 0076e70

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -12
app.py CHANGED
@@ -1,22 +1,25 @@
1
  from flask import Flask, render_template, request, jsonify
2
- import torch
3
- from transformers import pipeline
4
- from gtts import gTTS
5
  import os
 
 
6
  import re
 
 
 
 
7
 
8
  app = Flask(__name__)
9
 
10
- # Load Whisper Model for English Transcription
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
- asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
13
 
14
  # Function to generate audio prompts
15
  def generate_audio_prompt(text, filename):
16
  tts = gTTS(text=text, lang="en")
17
  tts.save(os.path.join("static", filename))
18
 
19
- # Generate audio prompts
20
  prompts = {
21
  "welcome": "Welcome to Biryani Hub.",
22
  "ask_name": "Tell me your name.",
@@ -27,9 +30,36 @@ prompts = {
27
  for key, text in prompts.items():
28
  generate_audio_prompt(text, f"{key}.mp3")
29
 
30
- # Clean transcribed text to allow only English letters, numbers, and basic punctuation
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def clean_transcription(text):
32
- return re.sub(r"[^a-zA-Z0-9@.\s]", "", text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  @app.route("/")
35
  def index():
@@ -45,12 +75,16 @@ def transcribe():
45
  audio_file.save(audio_path)
46
 
47
  try:
48
- # Transcribe audio to text
49
- result = asr_model(audio_path, generate_kwargs={"language": "en"})
 
 
50
  transcribed_text = clean_transcription(result["text"])
 
51
  return jsonify({"text": transcribed_text})
52
  except Exception as e:
53
- return jsonify({"error": str(e)}), 500
54
 
 
55
  if __name__ == "__main__":
56
- app.run(host="0.0.0.0", port=5000, debug=True)
 
1
  from flask import Flask, render_template, request, jsonify
 
 
 
2
  import os
3
+ import torch
4
+ import whisper
5
  import re
6
+ from pydub import AudioSegment
7
+ from pydub.silence import detect_nonsilent
8
+ from waitress import serve
9
+ from gtts import gTTS
10
 
11
  app = Flask(__name__)
12
 
13
+ # Load Whisper Model (Higher Accuracy)
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
+ whisper_model = whisper.load_model("medium") # Change to "large" for even better accuracy
16
 
17
  # Function to generate audio prompts
18
  def generate_audio_prompt(text, filename):
19
  tts = gTTS(text=text, lang="en")
20
  tts.save(os.path.join("static", filename))
21
 
22
+ # Generate voice prompts
23
  prompts = {
24
  "welcome": "Welcome to Biryani Hub.",
25
  "ask_name": "Tell me your name.",
 
30
  for key, text in prompts.items():
31
  generate_audio_prompt(text, f"{key}.mp3")
32
 
33
+ # Symbol mapping for proper recognition
34
+ SYMBOL_MAPPING = {
35
+ "at the rate": "@",
36
+ "at": "@",
37
+ "dot": ".",
38
+ "underscore": "_",
39
+ "hash": "#",
40
+ "plus": "+",
41
+ "dash": "-",
42
+ "comma": ",",
43
+ "space": " "
44
+ }
45
+
46
+ # Function to clean and format transcribed text
47
  def clean_transcription(text):
48
+ text = text.lower().strip()
49
+ for word, symbol in SYMBOL_MAPPING.items():
50
+ text = text.replace(word, symbol)
51
+ return text.capitalize()
52
+
53
+ # Function to detect speech duration (trim silence)
54
+ def trim_silence(audio_path):
55
+ audio = AudioSegment.from_wav(audio_path)
56
+ nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)
57
+
58
+ if nonsilent_parts:
59
+ start_trim = nonsilent_parts[0][0]
60
+ end_trim = nonsilent_parts[-1][1]
61
+ trimmed_audio = audio[start_trim:end_trim]
62
+ trimmed_audio.export(audio_path, format="wav") # Save trimmed audio
63
 
64
  @app.route("/")
65
  def index():
 
75
  audio_file.save(audio_path)
76
 
77
  try:
78
+ trim_silence(audio_path) # Remove silence before processing
79
+
80
+ # Transcribe using Whisper
81
+ result = whisper_model.transcribe(audio_path, language="english")
82
  transcribed_text = clean_transcription(result["text"])
83
+
84
  return jsonify({"text": transcribed_text})
85
  except Exception as e:
86
+ return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
87
 
88
+ # Run Waitress Production Server
89
  if __name__ == "__main__":
90
+ serve(app, host="0.0.0.0", port=7860)