Spaces:

geethareddy
/

boltvoice

Runtime error

App Files Files Community

geethareddy commited on Feb 1

Commit

4b54307

verified ·

1 Parent(s): f95dae2

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -11

app.py CHANGED Viewed

@@ -1,25 +1,26 @@
 from flask import Flask, render_template, request, jsonify
 import os
 import torch
 from transformers import pipeline
 from gtts import gTTS
-import re
 from pydub import AudioSegment
 from pydub.silence import detect_nonsilent
 from waitress import serve
 app = Flask(__name__)
-# Load Whisper Model (Use whisper-small for better performance)
 device = "cuda" if torch.cuda.is_available() else "cpu"
-asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
 # Function to generate audio prompts
 def generate_audio_prompt(text, filename):
     tts = gTTS(text=text, lang="en")
     tts.save(os.path.join("static", filename))
-# Generate all required voice prompts
 prompts = {
     "welcome": "Welcome to Biryani Hub.",
     "ask_name": "Tell me your name.",
@@ -30,7 +31,7 @@ prompts = {
 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
-# Symbol mapping for proper recognition
 SYMBOL_MAPPING = {
     "at the rate": "@",
     "at": "@",
@@ -43,17 +44,25 @@ SYMBOL_MAPPING = {
     "space": " "
 }
 # Function to clean and format transcribed text properly
 def clean_transcription(text):
     text = text.lower().strip()
     for word, symbol in SYMBOL_MAPPING.items():
         text = text.replace(word, symbol)
-    return text.capitalize()
 # Function to check if the audio contains actual speech
 def is_silent_audio(audio_path):
     audio = AudioSegment.from_wav(audio_path)
-    nonsilent_parts = detect_nonsilent(audio, min_silence_len=700, silence_thresh=audio.dBFS-16)
     return len(nonsilent_parts) == 0  # Returns True if silence detected
 @app.route("/")
@@ -70,18 +79,18 @@ def transcribe():
     audio_file.save(audio_path)
     try:
-        # Check if audio contains valid speech
         if is_silent_audio(audio_path):
             return jsonify({"error": "No speech detected. Please try again."}), 400
-        # Force Whisper to transcribe only in English
-        result = asr_model(audio_path, generate_kwargs={"language": "en"})
         transcribed_text = clean_transcription(result["text"])
         return jsonify({"text": transcribed_text})
     except Exception as e:
         return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
-# Run Waitress Production Server
 if __name__ == "__main__":
     serve(app, host="0.0.0.0", port=7860)

 from flask import Flask, render_template, request, jsonify
 import os
 import torch
+import re
 from transformers import pipeline
 from gtts import gTTS
 from pydub import AudioSegment
 from pydub.silence import detect_nonsilent
 from waitress import serve
+import whisper_timestamped  # Improved Whisper with timestamps
 app = Flask(__name__)
+# Load Whisper Model for Highly Accurate Speech-to-Text
 device = "cuda" if torch.cuda.is_available() else "cpu"
+asr_model = whisper_timestamped.load_model("medium", device=device)
 # Function to generate audio prompts
 def generate_audio_prompt(text, filename):
     tts = gTTS(text=text, lang="en")
     tts.save(os.path.join("static", filename))
+# Generate required voice prompts
 prompts = {
     "welcome": "Welcome to Biryani Hub.",
     "ask_name": "Tell me your name.",
 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
+# Symbol mapping for better recognition
 SYMBOL_MAPPING = {
     "at the rate": "@",
     "at": "@",
     "space": " "
 }
+# Function to extract meaningful words (Removes unnecessary phrases)
+def extract_meaningful_text(text):
+    text = text.lower().strip()
+    ignore_phrases = ["my name is", "this is", "i am", "it's", "name"]
+    for phrase in ignore_phrases:
+        text = text.replace(phrase, "").strip()
+    return text.capitalize()
 # Function to clean and format transcribed text properly
 def clean_transcription(text):
     text = text.lower().strip()
     for word, symbol in SYMBOL_MAPPING.items():
         text = text.replace(word, symbol)
+    return extract_meaningful_text(text)
 # Function to check if the audio contains actual speech
 def is_silent_audio(audio_path):
     audio = AudioSegment.from_wav(audio_path)
+    nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)
     return len(nonsilent_parts) == 0  # Returns True if silence detected
 @app.route("/")
     audio_file.save(audio_path)
     try:
+        # Check if the audio contains valid speech
         if is_silent_audio(audio_path):
             return jsonify({"error": "No speech detected. Please try again."}), 400
+        # Transcribe using Whisper
+        result = asr_model.transcribe(audio_path, language="en")
         transcribed_text = clean_transcription(result["text"])
         return jsonify({"text": transcribed_text})
     except Exception as e:
         return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
+# Use Waitress for Production Server
 if __name__ == "__main__":
     serve(app, host="0.0.0.0", port=7860)