mgokg commited on
Commit
891e168
·
verified ·
1 Parent(s): 8147059

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -23
app.py CHANGED
@@ -1,12 +1,11 @@
1
- import streamlit as st
2
  import os
3
  from groq import Groq
4
  import soundfile as sf
5
  from tempfile import NamedTemporaryFile
6
- import io # Für Bytes-IO hinzugefügt
7
 
8
  # Load the API key from the environment variable
9
- api_key = os.getenv('groqwhisper')
10
 
11
  if api_key is None:
12
  raise ValueError("groq_whisper environment variable is not set")
@@ -14,6 +13,7 @@ if api_key is None:
14
  # Initialize the Groq client
15
  client = Groq(api_key=api_key)
16
 
 
17
  def processaudio(audio_data):
18
  try:
19
  # Entpacken der Audiodaten (Sample-Rate und Numpy-Array)
@@ -39,27 +39,33 @@ def processaudio(audio_data):
39
  except Exception as e:
40
  return f"Ein Fehler ist aufgetreten: {str(e)}"
41
 
42
- # Streamlit Interface
43
- st.title("Audio Transkription")
44
- sr_outputs = st.empty()
45
-
46
- # Dateiupload und Mikrofonaufnahme als getrennte Eingaben
47
- uploaded_file = st.file_uploader("Laden Sie eine Audiodatei hoch", type=["wav", "mp3"])
48
- audio_bytes = st.audio_input("Oder sprechen Sie jetzt:", type="wav")
49
 
50
- if uploaded_file:
51
- # Verarbeitung hochgeladener Datei
52
- audio_data = sf.read(uploaded_file)
53
- transcription = processaudio(audio_data)
54
- sr_outputs.text(transcription)
55
 
56
- elif audio_bytes:
57
- # Verarbeitung Mikrofonaufnahme
58
  try:
59
- # Konvertiere Bytes in Audio-Daten
60
- audio_io = io.BytesIO(audio_bytes)
61
- audio_data = sf.read(audio_io)
62
- transcription = processaudio(audio_data)
63
- sr_outputs.text(transcription)
 
 
 
 
 
 
 
 
 
64
  except Exception as e:
65
- sr_outputs.text(f"Fehler bei der Aufnahmeverarbeitung: {str(e)}")
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  import os
3
  from groq import Groq
4
  import soundfile as sf
5
  from tempfile import NamedTemporaryFile
 
6
 
7
  # Load the API key from the environment variable
8
+ api_key = os.getenv('groq_whisper')
9
 
10
  if api_key is None:
11
  raise ValueError("groq_whisper environment variable is not set")
 
13
  # Initialize the Groq client
14
  client = Groq(api_key=api_key)
15
 
16
+
17
  def processaudio(audio_data):
18
  try:
19
  # Entpacken der Audiodaten (Sample-Rate und Numpy-Array)
 
39
  except Exception as e:
40
  return f"Ein Fehler ist aufgetreten: {str(e)}"
41
 
 
 
 
 
 
 
 
42
 
 
 
 
 
 
43
 
44
+ def process_audio(file_path):
45
+
46
  try:
47
+ # Open the audio file
48
+ with open(file_path, "rb") as file:
49
+ # Create a transcription of the audio file
50
+ transcription = client.audio.transcriptions.create(
51
+ file=(os.path.basename(file_path), file.read()), # Correct passing of filename
52
+ model="whisper-large-v3-turbo", # Required model to use for transcription
53
+ prompt="transcribe", # Optional
54
+ language="de", # Optional
55
+ response_format="json", # Optional
56
+ temperature=0.0 # Optional
57
+ )
58
+ # Return the transcription text
59
+ sr_inputs.clear()
60
+ return transcription.text
61
  except Exception as e:
62
+ return f"An error occurred: {str(e)}"
63
+
64
+ with gr.Blocks() as speech:
65
+ with gr.Row():
66
+ sr_outputs = gr.Textbox(label="Transkription")
67
+ with gr.Row():
68
+ sr_inputs = gr.Microphone(type="numpy")
69
+ sr_inputs.change(processaudio, inputs=sr_inputs, outputs=sr_outputs)
70
+
71
+ speech.launch()