File size: 3,990 Bytes
46bf7bb eb2635d 38df71d eb2635d 38df71d 4cba026 191f52c 38df71d 191f52c 38df71d eb2635d 191f52c 38df71d 517c9ad 38df71d eb2635d 38df71d eb2635d 38df71d eb2635d 38df71d eb2635d 38df71d 191f52c 38df71d 191f52c 38df71d 191f52c 38df71d eb2635d 38df71d eb2635d 38df71d 191f52c 38df71d 191f52c 38df71d 191f52c 38df71d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import os
import io
import streamlit as st
from groq import Groq
import soundfile as sf
import google.generativeai as genai
from audiorecorder import audiorecorder
# Umgebungsvariablen laden
groq_api_key = os.getenv('groqwhisper')
gemini_api_key = os.getenv('geminiapi')
if not groq_api_key or not gemini_api_key:
st.error("Bitte setze die Umgebungsvariablen 'groqwhisper' und 'geminiapi'")
st.stop()
# Groq Client initialisieren
groq_client = Groq(api_key=groq_api_key)
# Gemini konfigurieren
genai.configure(api_key=gemini_api_key)
# Modellkonfiguration
generation_config = {
"temperature": 0.4,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "text/plain",
}
model = genai.GenerativeModel(
model_name="gemini-2.0-flash-exp",
generation_config=generation_config,
)
audio_bytes = st.audio("Click to record")
# Session State für Chatverlauf
if "chat_session" not in st.session_state:
st.session_state.chat_session = model.start_chat(history=[])
if "display_history" not in st.session_state:
st.session_state.display_history = []
def process_audio(audio_data):
"""Verarbeitet Audiodaten und gibt Transkript zurück."""
try:
sample_rate, samples = audio_data
# WAV-Datei im Speicher erstellen
with io.BytesIO() as wav_buffer:
sf.write(wav_buffer, samples, sample_rate, format='WAV')
wav_buffer.seek(0)
# Transkription mit Groq
transcription = groq_client.audio.transcriptions.create(
file=("recording.wav", wav_buffer.read(), "audio/wav"),
model="whisper-large-v3-turbo",
prompt="transcribe",
language="de",
response_format="json",
temperature=0.0
)
return transcription.text
except Exception as e:
return f"Fehler: {str(e)}"
# Streamlit UI
st.set_page_config(
page_title="Gemini Chatbot mit Spracheingabe",
page_icon="🤖"
)
st.title("Gemini Chatbot 🎤+📝")
# Chatverlauf anzeigen
for role, text in st.session_state.display_history:
with st.chat_message(role):
st.markdown(text)
# Spracheingabe verarbeiten
audio_bytes = st.audio_input("Sprachnachricht aufnehmen")
if audio_bytes:
try:
audio_content = audio_bytes.getvalue()
with io.BytesIO(audio_content) as wav_io:
samples, sample_rate = sf.read(wav_io)
if len(samples.shape) > 1 and samples.shape[1] == 2:
samples = samples.mean(axis=1)
with st.spinner("Transkription..."):
transcription = process_audio((sample_rate, samples))
if transcription:
if transcription.startswith("Fehler:"):
st.error(transcription)
else:
st.session_state.display_history.append(("user", transcription))
full_prompt = f"{transcription}\nAntworte immer auf Deutsch"
response = st.session_state.chat_session.send_message(full_prompt)
response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
st.session_state.display_history.append(("assistant", response_text))
st.rerun()
except Exception as e:
st.error(f"Audioprocessing fehlgeschlagen: {str(e)}")
# Texteingabe verarbeiten
user_input = st.text_input("Schreibe deine Frage:", key="user_input")
if user_input:
st.session_state.display_history.append(("user", user_input))
full_prompt = f"{user_input}\nAntworte immer auf Deutsch"
response = st.session_state.chat_session.send_message(full_prompt)
response_text = response.candidates[0].content.parts[0].text if response.candidates else "Keine Antwort"
st.session_state.display_history.append(("assistant", response_text))
st.rerun() |