mgokg commited on
Commit
38df71d
·
verified ·
1 Parent(s): 3a0f8ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -80
app.py CHANGED
@@ -1,38 +1,57 @@
1
- import streamlit as st
2
  import os
3
- import google.generativeai as genai
4
  import io
5
- import soundfile as sf
6
  from groq import Groq
 
 
7
 
8
- # Streamlit Seite konfigurieren
9
- st.set_page_config(
10
- page_title="Gemini-2 Chatbot mit Spracheingabe",
11
- page_icon="🤖"
12
- )
13
 
14
- # Load environment variables for Groq
15
- api_key = os.getenv('groqwhisper')
16
- if api_key is None:
17
- st.error("The 'groq_whisper' environment variable is not set. Please set it and restart the app.")
18
  st.stop()
19
 
20
- # Initialize Groq client
21
- client = Groq(api_key=api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Funktion zur Verarbeitung von Audio
24
  def process_audio(audio_data):
25
- """Process audio data and return transcription."""
26
  try:
27
  sample_rate, samples = audio_data
28
 
29
- # Create in-memory WAV file
30
  with io.BytesIO() as wav_buffer:
31
  sf.write(wav_buffer, samples, sample_rate, format='WAV')
32
  wav_buffer.seek(0)
33
 
34
- # Send to Groq for transcription
35
- transcription = client.audio.transcriptions.create(
36
  file=("recording.wav", wav_buffer.read(), "audio/wav"),
37
  model="whisper-large-v3-turbo",
38
  prompt="transcribe",
@@ -42,73 +61,52 @@ def process_audio(audio_data):
42
  )
43
  return transcription.text
44
  except Exception as e:
45
- return f"An error occurred: {str(e)}"
46
-
47
- # Gemini API Konfiguration
48
- genai.configure(api_key=os.environ["geminiapi"])
49
 
50
- # Modell-Konfiguration
51
- generation_config = {
52
- "temperature": 0.4,
53
- "top_p": 0.95,
54
- "top_k": 40,
55
- "max_output_tokens": 8192,
56
- "response_mime_type": "text/plain",
57
- }
58
-
59
- model = genai.GenerativeModel(
60
- model_name="gemini-2.0-flash-exp",
61
- generation_config=generation_config,
62
- #grounding_config=grounding_config
63
  )
 
64
 
65
- # Chat Session State initialisieren
66
- if "chat_session" not in st.session_state:
67
- st.session_state.chat_session = model.start_chat(history=[])
68
-
69
- # UI Komponenten
70
- st.title("Gemini-2 Chatbot mit Spracheingabe")
71
-
72
- # Audio recorder component
73
- audio_bytes = st.audio_input("Klicke zum Aufnehmen", key="audio_input")
74
-
75
- # Chat Input
76
- user_input = st.text_input("Oder schreibe deine Frage:", key="text_input")
77
 
 
 
78
  if audio_bytes:
79
- # Extrahiere die Bytes aus dem UploadedFile-Objekt
80
- audio_bytes_content = audio_bytes.getvalue()
81
-
82
- # Konvertiere die Bytes in ein numpy-Array mit soundfile
83
- with io.BytesIO(audio_bytes_content) as wav_io:
84
- samples, sample_rate = sf.read(wav_io)
85
-
86
- # Konvertiere Stereo in Mono, falls erforderlich
87
- if len(samples.shape) > 1 and samples.shape[1] == 2:
88
- samples = samples.mean(axis=1)
89
-
90
- # Verarbeite das Audio
91
- with st.spinner("Transcribing..."):
92
- transcription = process_audio((sample_rate, samples))
93
-
94
- # Verarbeitet die Transkription als Eingabe für den Chatbot
95
- if transcription:
96
- user_input = transcription
 
 
 
 
97
 
 
 
98
  if user_input:
99
- messages = st.container()
100
- # Prompt mit Sprachaufforderung kombinieren
101
  full_prompt = f"{user_input}\nAntworte immer auf Deutsch"
102
-
103
- # Antwort generieren
104
  response = st.session_state.chat_session.send_message(full_prompt)
105
-
106
- # Antwort extrahieren
107
- if response.candidates:
108
- response_text = response.candidates[0].content.parts[0].text
109
- else:
110
- response_text = "Keine Antwort erhalten"
111
-
112
- # Antwort anzeigen
113
- messages.chat_message("user").write(f"{user_input}")
114
- messages.chat_message("assistant").write(f"{response_text}")
 
 
1
  import os
 
2
  import io
3
+ import streamlit as st
4
  from groq import Groq
5
+ import soundfile as sf
6
+ import google.generativeai as genai
7
 
8
+ # Umgebungsvariablen laden
9
+ groq_api_key = os.getenv('groqwhisper')
10
+ gemini_api_key = os.getenv('geminiapi')
 
 
11
 
12
+ if not groq_api_key or not gemini_api_key:
13
+ st.error("Bitte setze die Umgebungsvariablen 'groqwhisper' und 'geminiapi'")
 
 
14
  st.stop()
15
 
16
+ # Groq Client initialisieren
17
+ groq_client = Groq(api_key=groq_api_key)
18
+
19
+ # Gemini konfigurieren
20
+ genai.configure(api_key=gemini_api_key)
21
+
22
+ # Modellkonfiguration
23
+ generation_config = {
24
+ "temperature": 0.4,
25
+ "top_p": 0.95,
26
+ "top_k": 40,
27
+ "max_output_tokens": 8192,
28
+ "response_mime_type": "text/plain",
29
+ }
30
+
31
+ model = genai.GenerativeModel(
32
+ model_name="gemini-2.0-flash-exp",
33
+ generation_config=generation_config,
34
+ )
35
+
36
+ # Session State für Chatverlauf
37
+ if "chat_session" not in st.session_state:
38
+ st.session_state.chat_session = model.start_chat(history=[])
39
+
40
+ if "display_history" not in st.session_state:
41
+ st.session_state.display_history = []
42
 
 
43
  def process_audio(audio_data):
44
+ """Verarbeitet Audiodaten und gibt Transkript zurück."""
45
  try:
46
  sample_rate, samples = audio_data
47
 
48
+ # WAV-Datei im Speicher erstellen
49
  with io.BytesIO() as wav_buffer:
50
  sf.write(wav_buffer, samples, sample_rate, format='WAV')
51
  wav_buffer.seek(0)
52
 
53
+ # Transkription mit Groq
54
+ transcription = groq_client.audio.transcriptions.create(
55
  file=("recording.wav", wav_buffer.read(), "audio/wav"),
56
  model="whisper-large-v3-turbo",
57
  prompt="transcribe",
 
61
  )
62
  return transcription.text
63
  except Exception as e:
64
+ return f"Fehler: {str(e)}"
 
 
 
65
 
66
+ # Streamlit UI
67
+ st.set_page_config(
68
+ page_title="Gemini Chatbot mit Spracheingabe",
69
+ page_icon="🤖"
 
 
 
 
 
 
 
 
 
70
  )
71
+ st.title("Gemini Chatbot 🎤+📝")
72
 
73
+ # Chatverlauf anzeigen
74
+ for role, text in st.session_state.display_history:
75
+ with st.chat_message(role):
76
+ st.markdown(text)
 
 
 
 
 
 
 
 
77
 
78
+ # Spracheingabe verarbeiten
79
+ audio_bytes = st.audio_input("Sprachnachricht aufnehmen")
80
  if audio_bytes:
81
+ try:
82
+ audio_content = audio_bytes.getvalue()
83
+ with io.BytesIO(audio_content) as wav_io:
84
+ samples, sample_rate = sf.read(wav_io)
85
+ if len(samples.shape) > 1 and samples.shape[1] == 2:
86
+ samples = samples.mean(axis=1)
87
+
88
+ with st.spinner("Transkription..."):
89
+ transcription = process_audio((sample_rate, samples))
90
+
91
+ if transcription:
92
+ if transcription.startswith("Fehler:"):
93
+ st.error(transcription)
94
+ else:
95
+ st.session_state.display_history.append(("user", transcription))
96
+ full_prompt = f"{transcription}\nAntworte immer auf Deutsch"
97
+ response = st.session_state.chat_session.send_message(full_prompt)
98
+ response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
99
+ st.session_state.display_history.append(("assistant", response_text))
100
+ st.rerun()
101
+ except Exception as e:
102
+ st.error(f"Audioprocessing fehlgeschlagen: {str(e)}")
103
 
104
+ # Texteingabe verarbeiten
105
+ user_input = st.text_input("Schreibe deine Frage:", key="user_input")
106
  if user_input:
107
+ st.session_state.display_history.append(("user", user_input))
 
108
  full_prompt = f"{user_input}\nAntworte immer auf Deutsch"
 
 
109
  response = st.session_state.chat_session.send_message(full_prompt)
110
+ response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
111
+ st.session_state.display_history.append(("assistant", response_text))
112
+ st.rerun()