art-manuh commited on
Commit
5f47aa4
·
verified ·
1 Parent(s): 82f09c8

Upload 4 files

Browse files
Files changed (4) hide show
  1. .env +1 -0
  2. app.py +91 -0
  3. main.py +91 -0
  4. requirements.txt +9 -0
.env ADDED
@@ -0,0 +1 @@
 
 
1
+ MY_API_KEY=AIzaSyDhrrlu4Eh5WbDuO3u72jX9wkolqEKmkYU
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import google.generativeai as genai
3
+ import speech_recognition as sr
4
+ import pyttsx3
5
+ from dotenv import load_dotenv
6
+ import gradio as gr
7
+ import tempfile
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Initialize text-to-speech engine
13
+ engine = pyttsx3.init()
14
+
15
+ def speak_and_save(text):
16
+ """Use text-to-speech to speak the given text and save it as an audio file."""
17
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
18
+ audio_path = fp.name
19
+ engine.save_to_file(text, audio_path)
20
+ engine.runAndWait()
21
+ return audio_path
22
+
23
+ def recognize_speech_from_audio(audio_file):
24
+ """Capture and recognize speech from the audio file."""
25
+ recognizer = sr.Recognizer()
26
+ with sr.AudioFile(audio_file) as source:
27
+ audio = recognizer.record(source)
28
+ try:
29
+ command = recognizer.recognize_google(audio)
30
+ print(f"You said: {command}")
31
+ return command
32
+ except sr.UnknownValueError:
33
+ print("Could not understand audio.")
34
+ return None
35
+ except sr.RequestError:
36
+ print("Error with the speech recognition service.")
37
+ return None
38
+
39
+ def process_command(command):
40
+ """Generate a response based on the voice command using the AI model."""
41
+ if command:
42
+ response = model.generate_content([command])
43
+ reply = response.text.strip()
44
+ print(f"AI Response: {reply}")
45
+ return reply
46
+
47
+ # Main Code with Generative AI Setup
48
+ api_key = os.getenv("MY_API_KEY")
49
+
50
+ if api_key is None:
51
+ raise ValueError("API key not found in environment variables")
52
+
53
+ # Configure the AI model
54
+ genai.configure(api_key=api_key)
55
+
56
+ generation_config = {
57
+ "temperature": 1,
58
+ "top_p": 0.95,
59
+ "top_k": 64,
60
+ "max_output_tokens": 8192,
61
+ "response_mime_type": "text/plain",
62
+ }
63
+
64
+ model = genai.GenerativeModel(
65
+ model_name="gemini-1.5-flash-8b-exp-0827",
66
+ generation_config=generation_config,
67
+ )
68
+
69
+ wake_word = "sema"
70
+
71
+ def assistant(audio):
72
+ # Open the audio file instead of writing it directly
73
+ if audio is None:
74
+ return "No audio provided.", None
75
+
76
+ command = recognize_speech_from_audio(audio)
77
+
78
+ if command and wake_word in command.lower():
79
+ response_text = process_command(command)
80
+ audio_response = speak_and_save(response_text)
81
+ return response_text, audio_response
82
+ else:
83
+ return "Wake word not detected.", None
84
+
85
+ # Gradio Interface
86
+ gr.Interface(
87
+ fn=assistant, # Function to call when the interface is run
88
+ inputs=gr.Audio(type="filepath"), # Audio input, expecting a file path from the microphone
89
+ outputs=[gr.Textbox(), gr.Audio(type="filepath")], # Outputs text and the response audio
90
+ title="Sema Voice Assistant"
91
+ ).launch()
main.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import google.generativeai as genai
3
+ import speech_recognition as sr
4
+ import pyttsx3
5
+ from dotenv import load_dotenv
6
+ import gradio as gr
7
+ import tempfile
8
+
9
+ # Load environment variables
10
+ load_dotenv()
11
+
12
+ # Initialize text-to-speech engine
13
+ engine = pyttsx3.init()
14
+
15
+ def speak_and_save(text):
16
+ """Use text-to-speech to speak the given text and save it as an audio file."""
17
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
18
+ audio_path = fp.name
19
+ engine.save_to_file(text, audio_path)
20
+ engine.runAndWait()
21
+ return audio_path
22
+
23
+ def recognize_speech_from_audio(audio_file):
24
+ """Capture and recognize speech from the audio file."""
25
+ recognizer = sr.Recognizer()
26
+ with sr.AudioFile(audio_file) as source:
27
+ audio = recognizer.record(source)
28
+ try:
29
+ command = recognizer.recognize_google(audio)
30
+ print(f"You said: {command}")
31
+ return command
32
+ except sr.UnknownValueError:
33
+ print("Could not understand audio.")
34
+ return None
35
+ except sr.RequestError:
36
+ print("Error with the speech recognition service.")
37
+ return None
38
+
39
+ def process_command(command):
40
+ """Generate a response based on the voice command using the AI model."""
41
+ if command:
42
+ response = model.generate_content([command])
43
+ reply = response.text.strip()
44
+ print(f"AI Response: {reply}")
45
+ return reply
46
+
47
+ # Main Code with Generative AI Setup
48
+ api_key = os.getenv("MY_API_KEY")
49
+
50
+ if api_key is None:
51
+ raise ValueError("API key not found in environment variables")
52
+
53
+ # Configure the AI model
54
+ genai.configure(api_key=api_key)
55
+
56
+ generation_config = {
57
+ "temperature": 1,
58
+ "top_p": 0.95,
59
+ "top_k": 64,
60
+ "max_output_tokens": 8192,
61
+ "response_mime_type": "text/plain",
62
+ }
63
+
64
+ model = genai.GenerativeModel(
65
+ model_name="gemini-1.5-flash-8b-exp-0827",
66
+ generation_config=generation_config,
67
+ )
68
+
69
+ wake_word = "sema"
70
+
71
+ def assistant(audio):
72
+ # Open the audio file instead of writing it directly
73
+ if audio is None:
74
+ return "No audio provided.", None
75
+
76
+ command = recognize_speech_from_audio(audio)
77
+
78
+ if command and wake_word in command.lower():
79
+ response_text = process_command(command)
80
+ audio_response = speak_and_save(response_text)
81
+ return response_text, audio_response
82
+ else:
83
+ return "Wake word not detected.", None
84
+
85
+ # Gradio Interface
86
+ gr.Interface(
87
+ fn=assistant, # Function to call when the interface is run
88
+ inputs=gr.Audio(type="filepath"), # Audio input, expecting a file path from the microphone
89
+ outputs=[gr.Textbox(), gr.Audio(type="filepath")], # Outputs text and the response audio
90
+ title="Sema Voice Assistant"
91
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ google-generativeai
2
+ SpeechRecognition
3
+ pyttsx3
4
+ gtts
5
+ playsound
6
+ python-dotenv
7
+ pipwin
8
+ pyaudio
9
+ gradio