voicemenulogin625

Sleeping

App Files Files Community

lokesh341 commited on Feb 1

Commit

9061ed1

verified ·

1 Parent(s): 0076e70

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -12

app.py CHANGED Viewed

@@ -1,22 +1,25 @@
 from flask import Flask, render_template, request, jsonify
-import torch
-from transformers import pipeline
-from gtts import gTTS
 import os
 import re
 app = Flask(__name__)
-# Load Whisper Model for English Transcription
 device = "cuda" if torch.cuda.is_available() else "cpu"
-asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
 # Function to generate audio prompts
 def generate_audio_prompt(text, filename):
     tts = gTTS(text=text, lang="en")
     tts.save(os.path.join("static", filename))
-# Generate audio prompts
 prompts = {
     "welcome": "Welcome to Biryani Hub.",
     "ask_name": "Tell me your name.",
@@ -27,9 +30,36 @@ prompts = {
 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
-# Clean transcribed text to allow only English letters, numbers, and basic punctuation
 def clean_transcription(text):
-    return re.sub(r"[^a-zA-Z0-9@.\s]", "", text)
 @app.route("/")
 def index():
@@ -45,12 +75,16 @@ def transcribe():
     audio_file.save(audio_path)
     try:
-        # Transcribe audio to text
-        result = asr_model(audio_path, generate_kwargs={"language": "en"})
         transcribed_text = clean_transcription(result["text"])
         return jsonify({"text": transcribed_text})
     except Exception as e:
-        return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=5000, debug=True)

 from flask import Flask, render_template, request, jsonify
 import os
+import torch
+import whisper
 import re
+from pydub import AudioSegment
+from pydub.silence import detect_nonsilent
+from waitress import serve
+from gtts import gTTS
 app = Flask(__name__)
+# Load Whisper Model (Higher Accuracy)
 device = "cuda" if torch.cuda.is_available() else "cpu"
+whisper_model = whisper.load_model("medium")  # Change to "large" for even better accuracy
 # Function to generate audio prompts
 def generate_audio_prompt(text, filename):
     tts = gTTS(text=text, lang="en")
     tts.save(os.path.join("static", filename))
+# Generate voice prompts
 prompts = {
     "welcome": "Welcome to Biryani Hub.",
     "ask_name": "Tell me your name.",
 for key, text in prompts.items():
     generate_audio_prompt(text, f"{key}.mp3")
+# Symbol mapping for proper recognition
+SYMBOL_MAPPING = {
+    "at the rate": "@",
+    "at": "@",
+    "dot": ".",
+    "underscore": "_",
+    "hash": "#",
+    "plus": "+",
+    "dash": "-",
+    "comma": ",",
+    "space": " "
+}
+# Function to clean and format transcribed text
 def clean_transcription(text):
+    text = text.lower().strip()
+    for word, symbol in SYMBOL_MAPPING.items():
+        text = text.replace(word, symbol)
+    return text.capitalize()
+# Function to detect speech duration (trim silence)
+def trim_silence(audio_path):
+    audio = AudioSegment.from_wav(audio_path)
+    nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)
+    if nonsilent_parts:
+        start_trim = nonsilent_parts[0][0]
+        end_trim = nonsilent_parts[-1][1]
+        trimmed_audio = audio[start_trim:end_trim]
+        trimmed_audio.export(audio_path, format="wav")  # Save trimmed audio
 @app.route("/")
 def index():
     audio_file.save(audio_path)
     try:
+        trim_silence(audio_path)  # Remove silence before processing
+        # Transcribe using Whisper
+        result = whisper_model.transcribe(audio_path, language="english")
         transcribed_text = clean_transcription(result["text"])
         return jsonify({"text": transcribed_text})
     except Exception as e:
+        return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
+# Run Waitress Production Server
 if __name__ == "__main__":
+    serve(app, host="0.0.0.0", port=7860)