Spaces:
Runtime error
Runtime error
File size: 2,895 Bytes
69067ae d445f81 69067ae d445f81 0197ed3 adb5e2a a3e60d6 d445f81 0197ed3 69067ae d445f81 7494646 d445f81 7494646 7467739 69067ae 7467739 d445f81 7467739 adb5e2a 7467739 685e8d2 d445f81 78a5c3d e215f5e 78a5c3d d445f81 7467739 78a5c3d d445f81 78a5c3d d445f81 a3e60d6 69067ae 7467739 69067ae 7467739 69067ae 7467739 8ab530a 69067ae 7467739 69067ae 8ab530a 7494646 d445f81 7467739 d445f81 7467739 7494646 69067ae 0197ed3 d445f81 69067ae e215f5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
from flask import Flask, render_template, request, jsonify
import os
import torch
import speech_recognition as sr
from transformers import pipeline
from gtts import gTTS
import re
from pydub import AudioSegment
from pydub.silence import detect_nonsilent
from waitress import serve
app = Flask(__name__)
# Load Whisper Model for Accurate Speech-to-Text
device = "cuda" if torch.cuda.is_available() else "cpu"
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
# Function to generate audio prompts
def generate_audio_prompt(text, filename):
tts = gTTS(text=text, lang="en")
tts.save(os.path.join("static", filename))
# Generate all required voice prompts
prompts = {
"welcome": "Welcome to Biryani Hub.",
"ask_name": "Tell me your name.",
"ask_email": "Please provide your email address.",
"thank_you": "Thank you for registration."
}
for key, text in prompts.items():
generate_audio_prompt(text, f"{key}.mp3")
# Symbol mapping for better recognition
SYMBOL_MAPPING = {
"at the rate": "@",
"at": "@",
"dot": ".",
"underscore": "_",
"hash": "#",
"plus": "+",
"dash": "-",
"comma": ",",
"space": " "
}
# Function to clean and format transcribed text properly
def clean_transcription(text):
text = text.lower()
text = re.sub(r"\s+", " ", text).strip() # Remove extra spaces
for word, symbol in SYMBOL_MAPPING.items():
text = text.replace(word, symbol)
return text.capitalize()
# Function to detect speech duration and avoid cutting words
def trim_silence(audio_path):
audio = AudioSegment.from_wav(audio_path)
nonsilent_parts = detect_nonsilent(audio, min_silence_len=700, silence_thresh=audio.dBFS-16)
if nonsilent_parts:
start_trim = nonsilent_parts[0][0]
end_trim = nonsilent_parts[-1][1]
trimmed_audio = audio[start_trim:end_trim]
trimmed_audio.export(audio_path, format="wav") # Save trimmed audio
@app.route("/")
def index():
return render_template("index.html")
@app.route("/transcribe", methods=["POST"])
def transcribe():
if "audio" not in request.files:
return jsonify({"error": "No audio file provided"}), 400
audio_file = request.files["audio"]
audio_path = os.path.join("static", "temp.wav")
audio_file.save(audio_path)
try:
trim_silence(audio_path) # Trim silence before processing
# Force Whisper to transcribe only in English
result = asr_model(audio_path, generate_kwargs={"language": "en"})
transcribed_text = clean_transcription(result["text"])
return jsonify({"text": transcribed_text})
except Exception as e:
return jsonify({"error": str(e)}), 500
# Use Waitress for Production Server
if __name__ == "__main__":
serve(app, host="0.0.0.0", port=7860)
|