Update app.py
Browse files
app.py
CHANGED
@@ -1,61 +1,57 @@
|
|
|
|
1 |
import gradio as gr
|
2 |
-
from
|
|
|
|
|
|
|
|
|
3 |
import torch
|
4 |
|
5 |
-
#
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
model_name,
|
10 |
-
device_map="auto",
|
11 |
-
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
|
12 |
)
|
13 |
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
max_new_tokens=max_tokens,
|
33 |
-
temperature=temperature,
|
34 |
-
top_p=top_p,
|
35 |
-
do_sample=True,
|
36 |
-
)[0]['generated_text']
|
37 |
-
|
38 |
-
assistant_response = response.replace(prompt, "").strip()
|
39 |
-
history.append((message, assistant_response))
|
40 |
-
return assistant_response, history
|
41 |
-
|
42 |
-
demo = gr.ChatInterface(
|
43 |
-
fn=respond,
|
44 |
-
additional_inputs=[
|
45 |
-
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
|
46 |
-
gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
|
47 |
-
gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.01, label="Temperature"),
|
48 |
-
gr.Slider(
|
49 |
-
minimum=0.1,
|
50 |
-
maximum=1.0,
|
51 |
-
value=0.95,
|
52 |
-
step=0.01,
|
53 |
-
label="Top-p (nucleus sampling)",
|
54 |
-
),
|
55 |
-
],
|
56 |
-
title="Chat with LLaMA 2",
|
57 |
-
description="A chat interface using LLaMA 2 model locally via Transformers.",
|
58 |
)
|
59 |
|
60 |
-
|
61 |
-
demo.launch()
|
|
|
1 |
+
from openai import OpenAI
|
2 |
import gradio as gr
|
3 |
+
from gtts import gTTS
|
4 |
+
import tempfile
|
5 |
+
import librosa
|
6 |
+
import numpy as np
|
7 |
+
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
8 |
import torch
|
9 |
|
10 |
+
# OpenRouter API Setup
|
11 |
+
client = OpenAI(
|
12 |
+
base_url="https://openrouter.ai/api/v1",
|
13 |
+
api_key="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" # Replace with your OpenRouter API Key
|
|
|
|
|
|
|
14 |
)
|
15 |
|
16 |
+
def voice_assistant(audio_filepath):
|
17 |
+
if audio_filepath is None:
|
18 |
+
return "Please record your question.", None
|
19 |
+
|
20 |
+
# Load Whisper model and processor
|
21 |
+
processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo")
|
22 |
+
model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo")
|
23 |
+
|
24 |
+
# Load audio data using librosa
|
25 |
+
audio_data, sample_rate = librosa.load(audio_filepath, sr=16000)
|
26 |
+
|
27 |
+
# Convert audio to text using Whisper
|
28 |
+
input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features
|
29 |
+
predicted_ids = model.generate(input_features)
|
30 |
+
user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
|
31 |
+
|
32 |
+
# Generate AI response using OpenRouter
|
33 |
+
completion = client.chat.completions.create(
|
34 |
+
model="deepseek/deepseek-chat:free",
|
35 |
+
messages=[{"role": "user", "content": user_voice}]
|
36 |
+
)
|
37 |
+
|
38 |
+
ai_response = completion.choices[0].message.content
|
39 |
+
|
40 |
+
# Convert AI response to speech using gTTS
|
41 |
+
tts = gTTS(ai_response, lang="en")
|
42 |
+
temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
|
43 |
+
tts.save(temp_audio.name)
|
44 |
+
|
45 |
+
return ai_response, temp_audio.name # ✅ Returning both text and audio
|
46 |
|
47 |
+
# Gradio Interface
|
48 |
+
iface = gr.Interface(
|
49 |
+
fn=voice_assistant,
|
50 |
+
inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"),
|
51 |
+
outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")],
|
52 |
+
title="AI Voice Assistant",
|
53 |
+
description="Speak or type a question, and the AI will respond with voice output.",
|
54 |
+
live=True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
)
|
56 |
|
57 |
+
iface.launch()
|
|