geethareddy commited on
Commit
5207ead
·
verified ·
1 Parent(s): 4d5b8ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -16
app.py CHANGED
@@ -1,7 +1,6 @@
1
  from flask import Flask, render_template, request, jsonify
2
  import os
3
  import torch
4
- import speech_recognition as sr
5
  from transformers import pipeline
6
  from gtts import gTTS
7
  import re
@@ -11,7 +10,7 @@ from waitress import serve
11
 
12
  app = Flask(__name__)
13
 
14
- # Load Whisper Model for Accurate Speech-to-Text
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
  asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
17
 
@@ -31,7 +30,7 @@ prompts = {
31
  for key, text in prompts.items():
32
  generate_audio_prompt(text, f"{key}.mp3")
33
 
34
- # Symbol mapping for better recognition
35
  SYMBOL_MAPPING = {
36
  "at the rate": "@",
37
  "at": "@",
@@ -46,22 +45,16 @@ SYMBOL_MAPPING = {
46
 
47
  # Function to clean and format transcribed text properly
48
  def clean_transcription(text):
49
- text = text.lower()
50
- text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces
51
  for word, symbol in SYMBOL_MAPPING.items():
52
  text = text.replace(word, symbol)
53
  return text.capitalize()
54
 
55
- # Function to detect speech duration and avoid cutting words
56
- def trim_silence(audio_path):
57
  audio = AudioSegment.from_wav(audio_path)
58
  nonsilent_parts = detect_nonsilent(audio, min_silence_len=700, silence_thresh=audio.dBFS-16)
59
-
60
- if nonsilent_parts:
61
- start_trim = nonsilent_parts[0][0]
62
- end_trim = nonsilent_parts[-1][1]
63
- trimmed_audio = audio[start_trim:end_trim]
64
- trimmed_audio.export(audio_path, format="wav") # Save trimmed audio
65
 
66
  @app.route("/")
67
  def index():
@@ -77,7 +70,9 @@ def transcribe():
77
  audio_file.save(audio_path)
78
 
79
  try:
80
- trim_silence(audio_path) # Trim silence before processing
 
 
81
 
82
  # Force Whisper to transcribe only in English
83
  result = asr_model(audio_path, generate_kwargs={"language": "en"})
@@ -85,8 +80,8 @@ def transcribe():
85
 
86
  return jsonify({"text": transcribed_text})
87
  except Exception as e:
88
- return jsonify({"error": str(e)}), 500
89
 
90
- # Use Waitress for Production Server
91
  if __name__ == "__main__":
92
  serve(app, host="0.0.0.0", port=7860)
 
1
  from flask import Flask, render_template, request, jsonify
2
  import os
3
  import torch
 
4
  from transformers import pipeline
5
  from gtts import gTTS
6
  import re
 
10
 
11
  app = Flask(__name__)
12
 
13
+ # Load Whisper Model (Use whisper-small for better performance)
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
  asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
16
 
 
30
  for key, text in prompts.items():
31
  generate_audio_prompt(text, f"{key}.mp3")
32
 
33
+ # Symbol mapping for proper recognition
34
  SYMBOL_MAPPING = {
35
  "at the rate": "@",
36
  "at": "@",
 
45
 
46
  # Function to clean and format transcribed text properly
47
  def clean_transcription(text):
48
+ text = text.lower().strip()
 
49
  for word, symbol in SYMBOL_MAPPING.items():
50
  text = text.replace(word, symbol)
51
  return text.capitalize()
52
 
53
+ # Function to check if the audio contains actual speech
54
+ def is_silent_audio(audio_path):
55
  audio = AudioSegment.from_wav(audio_path)
56
  nonsilent_parts = detect_nonsilent(audio, min_silence_len=700, silence_thresh=audio.dBFS-16)
57
+ return len(nonsilent_parts) == 0 # Returns True if silence detected
 
 
 
 
 
58
 
59
  @app.route("/")
60
  def index():
 
70
  audio_file.save(audio_path)
71
 
72
  try:
73
+ # Check if audio contains valid speech
74
+ if is_silent_audio(audio_path):
75
+ return jsonify({"error": "No speech detected. Please try again."}), 400
76
 
77
  # Force Whisper to transcribe only in English
78
  result = asr_model(audio_path, generate_kwargs={"language": "en"})
 
80
 
81
  return jsonify({"text": transcribed_text})
82
  except Exception as e:
83
+ return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
84
 
85
+ # Run Waitress Production Server
86
  if __name__ == "__main__":
87
  serve(app, host="0.0.0.0", port=7860)