Spaces:
Runtime error
Runtime error
File size: 2,064 Bytes
69067ae 0197ed3 adb5e2a 7467739 a3e60d6 0197ed3 69067ae 7467739 7494646 7467739 7494646 7467739 69067ae 7467739 adb5e2a 7467739 685e8d2 78a5c3d 7467739 78a5c3d a3e60d6 69067ae 7467739 69067ae 7467739 69067ae 7467739 8ab530a 69067ae 7467739 69067ae 8ab530a 7494646 78a5c3d 7467739 7494646 69067ae 0197ed3 69067ae 7467739 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from flask import Flask, render_template, request, jsonify
import torch
from transformers import pipeline
from gtts import gTTS
import os
import re
app = Flask(__name__)
# Load Whisper Model for English Transcription
device = "cuda" if torch.cuda.is_available() else "cpu"
asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1)
# Function to generate audio prompts
def generate_audio_prompt(text, filename):
tts = gTTS(text=text, lang="en")
tts.save(os.path.join("static", filename))
# Generate audio prompts
prompts = {
"welcome": "Welcome to Biryani Hub.",
"ask_name": "Tell me your name.",
"ask_email": "Please provide your email address.",
"thank_you": "Thank you for registration."
}
for key, text in prompts.items():
generate_audio_prompt(text, f"{key}.mp3")
# Mapping for correctly converting spoken symbols to text
SYMBOL_MAPPING = {
"at the rate": "@",
"dot": ".",
"underscore": "_",
"hash": "#",
"plus": "+",
"dash": "-",
"comma": ",",
"space": " "
}
# Function to clean and process transcribed text
def clean_transcription(text):
text = text.lower()
for word, symbol in SYMBOL_MAPPING.items():
text = text.replace(word, symbol)
return text
@app.route("/")
def index():
return render_template("index.html")
@app.route("/transcribe", methods=["POST"])
def transcribe():
if "audio" not in request.files:
return jsonify({"error": "No audio file provided"}), 400
audio_file = request.files["audio"]
audio_path = os.path.join("static", "temp.wav")
audio_file.save(audio_path)
try:
# Transcribe audio to text with Whisper
result = asr_model(audio_path, generate_kwargs={"language": "en"})
transcribed_text = clean_transcription(result["text"])
return jsonify({"text": transcribed_text})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000, debug=True)
|