|
from flask import Flask, render_template, request, jsonify |
|
import os |
|
import torch |
|
import speech_recognition as sr |
|
from transformers import pipeline |
|
from gtts import gTTS |
|
|
|
app = Flask(__name__) |
|
recognizer = sr.Recognizer() |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-base", device=0 if device == "cuda" else -1) |
|
|
|
|
|
def generate_audio(text, filename="static/output.mp3"): |
|
tts = gTTS(text=text, lang="en") |
|
tts.save(filename) |
|
|
|
@app.route("/") |
|
def home(): |
|
return render_template("index.html") |
|
|
|
@app.route("/get_prompt") |
|
def get_prompt(): |
|
generate_audio("Welcome to Biryani Hub. Please tell me your name.", "static/welcome.mp3") |
|
return jsonify({"audio_url": "/static/welcome.mp3"}) |
|
|
|
@app.route("/process_audio", methods=["POST"]) |
|
def process_audio(): |
|
if "audio" not in request.files: |
|
return jsonify({"error": "No audio file"}), 400 |
|
|
|
audio_file = request.files["audio"] |
|
audio_path = "static/temp.wav" |
|
audio_file.save(audio_path) |
|
|
|
try: |
|
text = speech_to_text(audio_path)["text"] |
|
return jsonify({"text": text}) |
|
except Exception as e: |
|
return jsonify({"error": str(e)}), 500 |
|
|
|
if __name__ == "__main__": |
|
app.run(host="0.0.0.0", port=7860, debug=True) |
|
|