Spaces:

wifix199
/

LumiVoice

Running

App Files Files Community

wifix199 commited on 15 days ago

Commit

896d8a0

verified ·

1 Parent(s): 5e799fc

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -53

app.py CHANGED Viewed

@@ -1,61 +1,57 @@
 import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 import torch
-# Load the model and tokenizer
-model_name = "meta-llama/Llama-3.2-1B"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(
-    model_name,
-    device_map="auto",
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
 )
-# Initialize the pipeline
-generator = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    device_map="auto",
-    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-    max_new_tokens=512,
-)
-def respond(message, history, system_message, max_tokens, temperature, top_p):
-    prompt = f"{system_message}\n"
-    for user_msg, assistant_msg in history:
-        prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
-    prompt += f"User: {message}\nAssistant:"
-    response = generator(
-        prompt,
-        max_new_tokens=max_tokens,
-        temperature=temperature,
-        top_p=top_p,
-        do_sample=True,
-    )[0]['generated_text']
-    assistant_response = response.replace(prompt, "").strip()
-    history.append((message, assistant_response))
-    return assistant_response, history
-demo = gr.ChatInterface(
-    fn=respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.01, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.01,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-    title="Chat with LLaMA 2",
-    description="A chat interface using LLaMA 2 model locally via Transformers.",
 )
-if __name__ == "__main__":
-    demo.launch()

+from openai import OpenAI
 import gradio as gr
+from gtts import gTTS
+import tempfile
+import librosa
+import numpy as np
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
 import torch
+# OpenRouter API Setup
+client = OpenAI(
+    base_url="https://openrouter.ai/api/v1",
+    api_key="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"  # Replace with your OpenRouter API Key
 )
+def voice_assistant(audio_filepath):
+    if audio_filepath is None:
+        return "Please record your question.", None
+    # Load Whisper model and processor
+    processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo")
+    model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo")
+    # Load audio data using librosa
+    audio_data, sample_rate = librosa.load(audio_filepath, sr=16000)
+    # Convert audio to text using Whisper
+    input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features
+    predicted_ids = model.generate(input_features)
+    user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    # Generate AI response using OpenRouter
+    completion = client.chat.completions.create(
+        model="deepseek/deepseek-chat:free",
+        messages=[{"role": "user", "content": user_voice}]
+    )
+    ai_response = completion.choices[0].message.content
+    # Convert AI response to speech using gTTS
+    tts = gTTS(ai_response, lang="en")
+    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
+    tts.save(temp_audio.name)
+    return ai_response, temp_audio.name  # ✅ Returning both text and audio
+# Gradio Interface
+iface = gr.Interface(
+    fn=voice_assistant,
+    inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"),
+    outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")],
+    title="AI Voice Assistant",
+    description="Speak or type a question, and the AI will respond with voice output.",
+    live=True
 )
+iface.launch()