PandasAI / app.py
mgokg's picture
Update app.py
eb2635d verified
raw
history blame
3.54 kB
import streamlit as st
import os
import google.generativeai as genai
import io
import soundfile as sf
from groq import Groq
# Streamlit Seite konfigurieren
st.set_page_config(
page_title="Gemini-2 Chatbot mit Spracheingabe",
page_icon="🤖"
)
# Load environment variables for Groq
api_key = os.getenv('groqwhisper')
if api_key is None:
st.error("The 'groq_whisper' environment variable is not set. Please set it and restart the app.")
st.stop()
# Initialize Groq client
client = Groq(api_key=api_key)
# Funktion zur Verarbeitung von Audio
def process_audio(audio_data):
"""Process audio data and return transcription."""
try:
sample_rate, samples = audio_data
# Create in-memory WAV file
with io.BytesIO() as wav_buffer:
sf.write(wav_buffer, samples, sample_rate, format='WAV')
wav_buffer.seek(0)
# Send to Groq for transcription
transcription = client.audio.transcriptions.create(
file=("recording.wav", wav_buffer.read(), "audio/wav"),
model="whisper-large-v3-turbo",
prompt="transcribe",
language="de",
response_format="json",
temperature=0.0
)
return transcription.text
except Exception as e:
return f"An error occurred: {str(e)}"
# Gemini API Konfiguration
genai.configure(api_key=os.environ["geminiapi"])
# Modell-Konfiguration
generation_config = {
"temperature": 0.4,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-2.0-flash-exp",
generation_config=generation_config,
#grounding_config=grounding_config
)
# Chat Session State initialisieren
if "chat_session" not in st.session_state:
st.session_state.chat_session = model.start_chat(history=[])
# UI Komponenten
st.title("Gemini-2 Chatbot mit Spracheingabe")
# Audio recorder component
audio_bytes = st.audio_input("Klicke zum Aufnehmen", key="audio_input")
# Chat Input
user_input = st.text_input("Oder schreibe deine Frage:", key="text_input")
if audio_bytes:
# Extrahiere die Bytes aus dem UploadedFile-Objekt
audio_bytes_content = audio_bytes.getvalue()
# Konvertiere die Bytes in ein numpy-Array mit soundfile
with io.BytesIO(audio_bytes_content) as wav_io:
samples, sample_rate = sf.read(wav_io)
# Konvertiere Stereo in Mono, falls erforderlich
if len(samples.shape) > 1 and samples.shape[1] == 2:
samples = samples.mean(axis=1)
# Verarbeite das Audio
with st.spinner("Transcribing..."):
transcription = process_audio((sample_rate, samples))
# Verarbeitet die Transkription als Eingabe für den Chatbot
if transcription:
user_input = transcription
if user_input:
messages = st.container()
# Prompt mit Sprachaufforderung kombinieren
full_prompt = f"{user_input}\nAntworte immer auf Deutsch"
# Antwort generieren
response = st.session_state.chat_session.send_message(full_prompt)
# Antwort extrahieren
if response.candidates:
response_text = response.candidates[0].content.parts[0].text
else:
response_text = "Keine Antwort erhalten"
# Antwort anzeigen
messages.chat_message("user").write(f"{user_input}")
messages.chat_message("assistant").write(f"{response_text}")