Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files
.env
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
MY_API_KEY=AIzaSyDhrrlu4Eh5WbDuO3u72jX9wkolqEKmkYU
|
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import google.generativeai as genai
|
3 |
+
import speech_recognition as sr
|
4 |
+
import pyttsx3
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import gradio as gr
|
7 |
+
import tempfile
|
8 |
+
|
9 |
+
# Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
# Initialize text-to-speech engine
|
13 |
+
engine = pyttsx3.init()
|
14 |
+
|
15 |
+
def speak_and_save(text):
|
16 |
+
"""Use text-to-speech to speak the given text and save it as an audio file."""
|
17 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
|
18 |
+
audio_path = fp.name
|
19 |
+
engine.save_to_file(text, audio_path)
|
20 |
+
engine.runAndWait()
|
21 |
+
return audio_path
|
22 |
+
|
23 |
+
def recognize_speech_from_audio(audio_file):
|
24 |
+
"""Capture and recognize speech from the audio file."""
|
25 |
+
recognizer = sr.Recognizer()
|
26 |
+
with sr.AudioFile(audio_file) as source:
|
27 |
+
audio = recognizer.record(source)
|
28 |
+
try:
|
29 |
+
command = recognizer.recognize_google(audio)
|
30 |
+
print(f"You said: {command}")
|
31 |
+
return command
|
32 |
+
except sr.UnknownValueError:
|
33 |
+
print("Could not understand audio.")
|
34 |
+
return None
|
35 |
+
except sr.RequestError:
|
36 |
+
print("Error with the speech recognition service.")
|
37 |
+
return None
|
38 |
+
|
39 |
+
def process_command(command):
|
40 |
+
"""Generate a response based on the voice command using the AI model."""
|
41 |
+
if command:
|
42 |
+
response = model.generate_content([command])
|
43 |
+
reply = response.text.strip()
|
44 |
+
print(f"AI Response: {reply}")
|
45 |
+
return reply
|
46 |
+
|
47 |
+
# Main Code with Generative AI Setup
|
48 |
+
api_key = os.getenv("MY_API_KEY")
|
49 |
+
|
50 |
+
if api_key is None:
|
51 |
+
raise ValueError("API key not found in environment variables")
|
52 |
+
|
53 |
+
# Configure the AI model
|
54 |
+
genai.configure(api_key=api_key)
|
55 |
+
|
56 |
+
generation_config = {
|
57 |
+
"temperature": 1,
|
58 |
+
"top_p": 0.95,
|
59 |
+
"top_k": 64,
|
60 |
+
"max_output_tokens": 8192,
|
61 |
+
"response_mime_type": "text/plain",
|
62 |
+
}
|
63 |
+
|
64 |
+
model = genai.GenerativeModel(
|
65 |
+
model_name="gemini-1.5-flash-8b-exp-0827",
|
66 |
+
generation_config=generation_config,
|
67 |
+
)
|
68 |
+
|
69 |
+
wake_word = "sema"
|
70 |
+
|
71 |
+
def assistant(audio):
|
72 |
+
# Open the audio file instead of writing it directly
|
73 |
+
if audio is None:
|
74 |
+
return "No audio provided.", None
|
75 |
+
|
76 |
+
command = recognize_speech_from_audio(audio)
|
77 |
+
|
78 |
+
if command and wake_word in command.lower():
|
79 |
+
response_text = process_command(command)
|
80 |
+
audio_response = speak_and_save(response_text)
|
81 |
+
return response_text, audio_response
|
82 |
+
else:
|
83 |
+
return "Wake word not detected.", None
|
84 |
+
|
85 |
+
# Gradio Interface
|
86 |
+
gr.Interface(
|
87 |
+
fn=assistant, # Function to call when the interface is run
|
88 |
+
inputs=gr.Audio(type="filepath"), # Audio input, expecting a file path from the microphone
|
89 |
+
outputs=[gr.Textbox(), gr.Audio(type="filepath")], # Outputs text and the response audio
|
90 |
+
title="Sema Voice Assistant"
|
91 |
+
).launch()
|
main.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import google.generativeai as genai
|
3 |
+
import speech_recognition as sr
|
4 |
+
import pyttsx3
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
import gradio as gr
|
7 |
+
import tempfile
|
8 |
+
|
9 |
+
# Load environment variables
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
# Initialize text-to-speech engine
|
13 |
+
engine = pyttsx3.init()
|
14 |
+
|
15 |
+
def speak_and_save(text):
|
16 |
+
"""Use text-to-speech to speak the given text and save it as an audio file."""
|
17 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
|
18 |
+
audio_path = fp.name
|
19 |
+
engine.save_to_file(text, audio_path)
|
20 |
+
engine.runAndWait()
|
21 |
+
return audio_path
|
22 |
+
|
23 |
+
def recognize_speech_from_audio(audio_file):
|
24 |
+
"""Capture and recognize speech from the audio file."""
|
25 |
+
recognizer = sr.Recognizer()
|
26 |
+
with sr.AudioFile(audio_file) as source:
|
27 |
+
audio = recognizer.record(source)
|
28 |
+
try:
|
29 |
+
command = recognizer.recognize_google(audio)
|
30 |
+
print(f"You said: {command}")
|
31 |
+
return command
|
32 |
+
except sr.UnknownValueError:
|
33 |
+
print("Could not understand audio.")
|
34 |
+
return None
|
35 |
+
except sr.RequestError:
|
36 |
+
print("Error with the speech recognition service.")
|
37 |
+
return None
|
38 |
+
|
39 |
+
def process_command(command):
|
40 |
+
"""Generate a response based on the voice command using the AI model."""
|
41 |
+
if command:
|
42 |
+
response = model.generate_content([command])
|
43 |
+
reply = response.text.strip()
|
44 |
+
print(f"AI Response: {reply}")
|
45 |
+
return reply
|
46 |
+
|
47 |
+
# Main Code with Generative AI Setup
|
48 |
+
api_key = os.getenv("MY_API_KEY")
|
49 |
+
|
50 |
+
if api_key is None:
|
51 |
+
raise ValueError("API key not found in environment variables")
|
52 |
+
|
53 |
+
# Configure the AI model
|
54 |
+
genai.configure(api_key=api_key)
|
55 |
+
|
56 |
+
generation_config = {
|
57 |
+
"temperature": 1,
|
58 |
+
"top_p": 0.95,
|
59 |
+
"top_k": 64,
|
60 |
+
"max_output_tokens": 8192,
|
61 |
+
"response_mime_type": "text/plain",
|
62 |
+
}
|
63 |
+
|
64 |
+
model = genai.GenerativeModel(
|
65 |
+
model_name="gemini-1.5-flash-8b-exp-0827",
|
66 |
+
generation_config=generation_config,
|
67 |
+
)
|
68 |
+
|
69 |
+
wake_word = "sema"
|
70 |
+
|
71 |
+
def assistant(audio):
|
72 |
+
# Open the audio file instead of writing it directly
|
73 |
+
if audio is None:
|
74 |
+
return "No audio provided.", None
|
75 |
+
|
76 |
+
command = recognize_speech_from_audio(audio)
|
77 |
+
|
78 |
+
if command and wake_word in command.lower():
|
79 |
+
response_text = process_command(command)
|
80 |
+
audio_response = speak_and_save(response_text)
|
81 |
+
return response_text, audio_response
|
82 |
+
else:
|
83 |
+
return "Wake word not detected.", None
|
84 |
+
|
85 |
+
# Gradio Interface
|
86 |
+
gr.Interface(
|
87 |
+
fn=assistant, # Function to call when the interface is run
|
88 |
+
inputs=gr.Audio(type="filepath"), # Audio input, expecting a file path from the microphone
|
89 |
+
outputs=[gr.Textbox(), gr.Audio(type="filepath")], # Outputs text and the response audio
|
90 |
+
title="Sema Voice Assistant"
|
91 |
+
).launch()
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
google-generativeai
|
2 |
+
SpeechRecognition
|
3 |
+
pyttsx3
|
4 |
+
gtts
|
5 |
+
playsound
|
6 |
+
python-dotenv
|
7 |
+
pipwin
|
8 |
+
pyaudio
|
9 |
+
gradio
|