Spaces:

art-manuh
/

sema_voice_assistant

Sleeping

App Files Files Community

art-manuh commited on Sep 12, 2024

Commit

5f47aa4

verified ·

1 Parent(s): 82f09c8

Upload 4 files

Browse files

Files changed (4) hide show

.env +1 -0
app.py +91 -0
main.py +91 -0
requirements.txt +9 -0

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ MY_API_KEY=AIzaSyDhrrlu4Eh5WbDuO3u72jX9wkolqEKmkYU

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import google.generativeai as genai
+import speech_recognition as sr
+import pyttsx3
+from dotenv import load_dotenv
+import gradio as gr
+import tempfile
+# Load environment variables
+load_dotenv()
+# Initialize text-to-speech engine
+engine = pyttsx3.init()
+def speak_and_save(text):
+    """Use text-to-speech to speak the given text and save it as an audio file."""
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
+        audio_path = fp.name
+    engine.save_to_file(text, audio_path)
+    engine.runAndWait()
+    return audio_path
+def recognize_speech_from_audio(audio_file):
+    """Capture and recognize speech from the audio file."""
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+    try:
+        command = recognizer.recognize_google(audio)
+        print(f"You said: {command}")
+        return command
+    except sr.UnknownValueError:
+        print("Could not understand audio.")
+        return None
+    except sr.RequestError:
+        print("Error with the speech recognition service.")
+        return None
+def process_command(command):
+    """Generate a response based on the voice command using the AI model."""
+    if command:
+        response = model.generate_content([command])
+        reply = response.text.strip()
+        print(f"AI Response: {reply}")
+        return reply
+# Main Code with Generative AI Setup
+api_key = os.getenv("MY_API_KEY")
+if api_key is None:
+    raise ValueError("API key not found in environment variables")
+# Configure the AI model
+genai.configure(api_key=api_key)
+generation_config = {
+    "temperature": 1,
+    "top_p": 0.95,
+    "top_k": 64,
+    "max_output_tokens": 8192,
+    "response_mime_type": "text/plain",
+}
+model = genai.GenerativeModel(
+    model_name="gemini-1.5-flash-8b-exp-0827",
+    generation_config=generation_config,
+)
+wake_word = "sema"
+def assistant(audio):
+    # Open the audio file instead of writing it directly
+    if audio is None:
+        return "No audio provided.", None
+    command = recognize_speech_from_audio(audio)
+    if command and wake_word in command.lower():
+        response_text = process_command(command)
+        audio_response = speak_and_save(response_text)
+        return response_text, audio_response
+    else:
+        return "Wake word not detected.", None
+# Gradio Interface
+gr.Interface(
+    fn=assistant,  # Function to call when the interface is run
+    inputs=gr.Audio(type="filepath"),  # Audio input, expecting a file path from the microphone
+    outputs=[gr.Textbox(), gr.Audio(type="filepath")],  # Outputs text and the response audio
+    title="Sema Voice Assistant"
+).launch()

main.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import google.generativeai as genai
+import speech_recognition as sr
+import pyttsx3
+from dotenv import load_dotenv
+import gradio as gr
+import tempfile
+# Load environment variables
+load_dotenv()
+# Initialize text-to-speech engine
+engine = pyttsx3.init()
+def speak_and_save(text):
+    """Use text-to-speech to speak the given text and save it as an audio file."""
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
+        audio_path = fp.name
+    engine.save_to_file(text, audio_path)
+    engine.runAndWait()
+    return audio_path
+def recognize_speech_from_audio(audio_file):
+    """Capture and recognize speech from the audio file."""
+    recognizer = sr.Recognizer()
+    with sr.AudioFile(audio_file) as source:
+        audio = recognizer.record(source)
+    try:
+        command = recognizer.recognize_google(audio)
+        print(f"You said: {command}")
+        return command
+    except sr.UnknownValueError:
+        print("Could not understand audio.")
+        return None
+    except sr.RequestError:
+        print("Error with the speech recognition service.")
+        return None
+def process_command(command):
+    """Generate a response based on the voice command using the AI model."""
+    if command:
+        response = model.generate_content([command])
+        reply = response.text.strip()
+        print(f"AI Response: {reply}")
+        return reply
+# Main Code with Generative AI Setup
+api_key = os.getenv("MY_API_KEY")
+if api_key is None:
+    raise ValueError("API key not found in environment variables")
+# Configure the AI model
+genai.configure(api_key=api_key)
+generation_config = {
+    "temperature": 1,
+    "top_p": 0.95,
+    "top_k": 64,
+    "max_output_tokens": 8192,
+    "response_mime_type": "text/plain",
+}
+model = genai.GenerativeModel(
+    model_name="gemini-1.5-flash-8b-exp-0827",
+    generation_config=generation_config,
+)
+wake_word = "sema"
+def assistant(audio):
+    # Open the audio file instead of writing it directly
+    if audio is None:
+        return "No audio provided.", None
+    command = recognize_speech_from_audio(audio)
+    if command and wake_word in command.lower():
+        response_text = process_command(command)
+        audio_response = speak_and_save(response_text)
+        return response_text, audio_response
+    else:
+        return "Wake word not detected.", None
+# Gradio Interface
+gr.Interface(
+    fn=assistant,  # Function to call when the interface is run
+    inputs=gr.Audio(type="filepath"),  # Audio input, expecting a file path from the microphone
+    outputs=[gr.Textbox(), gr.Audio(type="filepath")],  # Outputs text and the response audio
+    title="Sema Voice Assistant"
+).launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+google-generativeai
+SpeechRecognition
+pyttsx3
+gtts
+playsound
+python-dotenv
+pipwin
+pyaudio
+gradio