wifix199 commited on
Commit
896d8a0
·
verified ·
1 Parent(s): 5e799fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -53
app.py CHANGED
@@ -1,61 +1,57 @@
 
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
 
 
 
3
  import torch
4
 
5
- # Load the model and tokenizer
6
- model_name = "meta-llama/Llama-3.2-1B"
7
- tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForCausalLM.from_pretrained(
9
- model_name,
10
- device_map="auto",
11
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
12
  )
13
 
14
- # Initialize the pipeline
15
- generator = pipeline(
16
- "text-generation",
17
- model=model,
18
- tokenizer=tokenizer,
19
- device_map="auto",
20
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
21
- max_new_tokens=512,
22
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
- def respond(message, history, system_message, max_tokens, temperature, top_p):
25
- prompt = f"{system_message}\n"
26
- for user_msg, assistant_msg in history:
27
- prompt += f"User: {user_msg}\nAssistant: {assistant_msg}\n"
28
- prompt += f"User: {message}\nAssistant:"
29
-
30
- response = generator(
31
- prompt,
32
- max_new_tokens=max_tokens,
33
- temperature=temperature,
34
- top_p=top_p,
35
- do_sample=True,
36
- )[0]['generated_text']
37
-
38
- assistant_response = response.replace(prompt, "").strip()
39
- history.append((message, assistant_response))
40
- return assistant_response, history
41
-
42
- demo = gr.ChatInterface(
43
- fn=respond,
44
- additional_inputs=[
45
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
46
- gr.Slider(minimum=1, maximum=1024, value=512, step=1, label="Max new tokens"),
47
- gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.01, label="Temperature"),
48
- gr.Slider(
49
- minimum=0.1,
50
- maximum=1.0,
51
- value=0.95,
52
- step=0.01,
53
- label="Top-p (nucleus sampling)",
54
- ),
55
- ],
56
- title="Chat with LLaMA 2",
57
- description="A chat interface using LLaMA 2 model locally via Transformers.",
58
  )
59
 
60
- if __name__ == "__main__":
61
- demo.launch()
 
1
+ from openai import OpenAI
2
  import gradio as gr
3
+ from gtts import gTTS
4
+ import tempfile
5
+ import librosa
6
+ import numpy as np
7
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration
8
  import torch
9
 
10
+ # OpenRouter API Setup
11
+ client = OpenAI(
12
+ base_url="https://openrouter.ai/api/v1",
13
+ api_key="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" # Replace with your OpenRouter API Key
 
 
 
14
  )
15
 
16
+ def voice_assistant(audio_filepath):
17
+ if audio_filepath is None:
18
+ return "Please record your question.", None
19
+
20
+ # Load Whisper model and processor
21
+ processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3-turbo")
22
+ model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v3-turbo")
23
+
24
+ # Load audio data using librosa
25
+ audio_data, sample_rate = librosa.load(audio_filepath, sr=16000)
26
+
27
+ # Convert audio to text using Whisper
28
+ input_features = processor(audio_data, sampling_rate=sample_rate, return_tensors="pt").input_features
29
+ predicted_ids = model.generate(input_features)
30
+ user_voice = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
31
+
32
+ # Generate AI response using OpenRouter
33
+ completion = client.chat.completions.create(
34
+ model="deepseek/deepseek-chat:free",
35
+ messages=[{"role": "user", "content": user_voice}]
36
+ )
37
+
38
+ ai_response = completion.choices[0].message.content
39
+
40
+ # Convert AI response to speech using gTTS
41
+ tts = gTTS(ai_response, lang="en")
42
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
43
+ tts.save(temp_audio.name)
44
+
45
+ return ai_response, temp_audio.name # ✅ Returning both text and audio
46
 
47
+ # Gradio Interface
48
+ iface = gr.Interface(
49
+ fn=voice_assistant,
50
+ inputs=gr.Audio(sources=["microphone"], type="filepath", label="Speak Your Question"),
51
+ outputs=[gr.Textbox(label="AI Response"), gr.Audio(label="Voice Response")],
52
+ title="AI Voice Assistant",
53
+ description="Speak or type a question, and the AI will respond with voice output.",
54
+ live=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  )
56
 
57
+ iface.launch()