Spaces:

geethareddy
/

boltvoice

Runtime error

App Files Files Community

geethareddy commited on about 1 month ago

Commit

caba180

verified ·

1 Parent(s): e1fbc96

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -23

app.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import torch
 from flask import Flask, render_template, request, jsonify
 import os
-import re
 from transformers import pipeline
 from gtts import gTTS
 from pydub import AudioSegment
@@ -10,11 +9,11 @@ from waitress import serve
 app = Flask(__name__)
-# ✅ Load Whisper ASR Model Correctly
 device = "cuda" if torch.cuda.is_available() else "cpu"
-asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device=0 if device == "cuda" else -1)
-# Function to generate voice prompts
 def generate_audio_prompt(text, filename):
     tts = gTTS(text=text, lang="en")
     tts.save(os.path.join("static", filename))
@@ -22,7 +21,7 @@ def generate_audio_prompt(text, filename):
 # Generate required voice prompts
 prompts = {
     "welcome": "Welcome to Biryani Hub.",
-    "ask_name": "Tell me your full name.",
     "ask_email": "Please provide your email address.",
     "thank_you": "Thank you for registration."
 }
@@ -30,28 +29,33 @@ prompts = {
 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
-# ✅ Ensure Proper Audio Format (16kHz, Mono)
 def convert_to_wav(input_path, output_path):
     try:
         audio = AudioSegment.from_file(input_path)
-        audio = audio.set_frame_rate(16000).set_channels(1)  # ✅ Convert to 16kHz, mono
         audio.export(output_path, format="wav")
     except Exception as e:
         raise Exception(f"Audio conversion failed: {str(e)}")
-# ✅ Check for Silence
 def is_silent_audio(audio_path):
     audio = AudioSegment.from_wav(audio_path)
-    nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)
-    return len(nonsilent_parts) == 0
-# ✅ Clean Transcription Text (Improved Formatting)
-def clean_transcription(text):
-    text = text.strip()
-    text = re.sub(r"[-.]", "", text)  # ✅ Remove unwanted characters
-    # Fix email structure and common recognition errors
-    text = text.replace(" at the rate ", "@").replace(" dot ", ".")
-    return text.capitalize()
 @app.route("/")
 def index():
@@ -68,21 +72,21 @@ def transcribe():
     audio_file.save(input_audio_path)
     try:
-        # ✅ Convert audio to proper format
         convert_to_wav(input_audio_path, output_audio_path)
-        # ✅ Check for silent audio
         if is_silent_audio(output_audio_path):
             return jsonify({"error": "No speech detected. Please try again."}), 400
-        # ✅ Transcribe Using Whisper ASR
         result = asr_model(output_audio_path, generate_kwargs={"language": "en"})
-        transcribed_text = clean_transcription(result["text"])
         return jsonify({"text": transcribed_text})
     except Exception as e:
         return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
-# ✅ Start Production Server
 if __name__ == "__main__":
     serve(app, host="0.0.0.0", port=7860)

 import torch
 from flask import Flask, render_template, request, jsonify
 import os
 from transformers import pipeline
 from gtts import gTTS
 from pydub import AudioSegment
 app = Flask(__name__)
+# Use whisper-small for faster processing and better speed
 device = "cuda" if torch.cuda.is_available() else "cpu"
+asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
+# Function to generate audio prompts
 def generate_audio_prompt(text, filename):
     tts = gTTS(text=text, lang="en")
     tts.save(os.path.join("static", filename))
 # Generate required voice prompts
 prompts = {
     "welcome": "Welcome to Biryani Hub.",
+    "ask_name": "Tell me your name.",
     "ask_email": "Please provide your email address.",
     "thank_you": "Thank you for registration."
 }
 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
+# Symbol mapping for proper recognition
+SYMBOL_MAPPING = {
+    "at the rate": "@",
+    "at": "@",
+    "dot": ".",
+    "underscore": "_",
+    "hash": "#",
+    "plus": "+",
+    "dash": "-",
+    "comma": ",",
+    "space": " "
+}
+# Function to convert audio to WAV format
 def convert_to_wav(input_path, output_path):
     try:
         audio = AudioSegment.from_file(input_path)
+        audio = audio.set_frame_rate(16000).set_channels(1)  # Convert to 16kHz, mono
         audio.export(output_path, format="wav")
     except Exception as e:
         raise Exception(f"Audio conversion failed: {str(e)}")
+# Function to check if audio contains actual speech
 def is_silent_audio(audio_path):
     audio = AudioSegment.from_wav(audio_path)
+    nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)  # Reduced silence duration
+    return len(nonsilent_parts) == 0  # If no speech detected
 @app.route("/")
 def index():
     audio_file.save(input_audio_path)
     try:
+        # Convert to WAV
         convert_to_wav(input_audio_path, output_audio_path)
+        # Check for silence
         if is_silent_audio(output_audio_path):
             return jsonify({"error": "No speech detected. Please try again."}), 400
+        # Use Whisper ASR model for transcription
         result = asr_model(output_audio_path, generate_kwargs={"language": "en"})
+        transcribed_text = result["text"].strip().capitalize()
         return jsonify({"text": transcribed_text})
     except Exception as e:
         return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
+# Start Production Server
 if __name__ == "__main__":
     serve(app, host="0.0.0.0", port=7860)