from flask import Flask, render_template, request, jsonify import os import torch import speech_recognition as sr from transformers import pipeline from gtts import gTTS import time app = Flask(__name__) recognizer = sr.Recognizer() # Load Hugging Face Whisper Model for Speech-to-Text device = "cuda" if torch.cuda.is_available() else "cpu" speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1) # Function to generate and save voice prompts def generate_audio(text, filename): tts = gTTS(text=text, lang="en") tts.save(filename) # Generate all prompts before starting generate_audio("Welcome to Biryani Hub.", "static/welcome.mp3") generate_audio("Tell me your name.", "static/ask_name.mp3") generate_audio("Please provide your email.", "static/ask_email.mp3") generate_audio("Thank you for registration.", "static/thank_you.mp3") @app.route("/") def home(): return render_template("index.html") @app.route("/process_audio", methods=["POST"]) def process_audio(): if "audio" not in request.files: return jsonify({"error": "No audio file"}), 400 audio_file = request.files["audio"] audio_path = "static/temp.wav" audio_file.save(audio_path) try: text = speech_to_text(audio_path)["text"] return jsonify({"text": text}) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)