from flask import Flask, render_template, request, jsonify import os import torch import speech_recognition as sr from transformers import pipeline from gtts import gTTS app = Flask(__name__) recognizer = sr.Recognizer() # Load Hugging Face Whisper Model device = "cuda" if torch.cuda.is_available() else "cpu" speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1) # Function to convert text to speech def generate_audio(text, filename="static/output.mp3"): tts = gTTS(text=text, lang="en") tts.save(filename) @app.route("/") def home(): return render_template("index.html") @app.route("/get_prompt") def get_prompt(): generate_audio("Welcome to Biryani Hub. Please tell me your name.", "static/welcome.mp3") return jsonify({"audio_url": "/static/welcome.mp3"}) @app.route("/process_audio", methods=["POST"]) def process_audio(): if "audio" not in request.files: return jsonify({"error": "No audio file"}), 400 audio_file = request.files["audio"] audio_path = "static/temp.wav" audio_file.save(audio_path) try: text = speech_to_text(audio_path)["text"] return jsonify({"text": text}) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=7860, debug=True)