Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,11 @@
|
|
1 |
-
import
|
2 |
import os
|
3 |
from groq import Groq
|
4 |
import soundfile as sf
|
5 |
from tempfile import NamedTemporaryFile
|
6 |
-
import io # Für Bytes-IO hinzugefügt
|
7 |
|
8 |
# Load the API key from the environment variable
|
9 |
-
api_key = os.getenv('
|
10 |
|
11 |
if api_key is None:
|
12 |
raise ValueError("groq_whisper environment variable is not set")
|
@@ -14,6 +13,7 @@ if api_key is None:
|
|
14 |
# Initialize the Groq client
|
15 |
client = Groq(api_key=api_key)
|
16 |
|
|
|
17 |
def processaudio(audio_data):
|
18 |
try:
|
19 |
# Entpacken der Audiodaten (Sample-Rate und Numpy-Array)
|
@@ -39,27 +39,33 @@ def processaudio(audio_data):
|
|
39 |
except Exception as e:
|
40 |
return f"Ein Fehler ist aufgetreten: {str(e)}"
|
41 |
|
42 |
-
# Streamlit Interface
|
43 |
-
st.title("Audio Transkription")
|
44 |
-
sr_outputs = st.empty()
|
45 |
-
|
46 |
-
# Dateiupload und Mikrofonaufnahme als getrennte Eingaben
|
47 |
-
uploaded_file = st.file_uploader("Laden Sie eine Audiodatei hoch", type=["wav", "mp3"])
|
48 |
-
audio_bytes = st.audio_input("Oder sprechen Sie jetzt:", type="wav")
|
49 |
|
50 |
-
if uploaded_file:
|
51 |
-
# Verarbeitung hochgeladener Datei
|
52 |
-
audio_data = sf.read(uploaded_file)
|
53 |
-
transcription = processaudio(audio_data)
|
54 |
-
sr_outputs.text(transcription)
|
55 |
|
56 |
-
|
57 |
-
|
58 |
try:
|
59 |
-
#
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
except Exception as e:
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
import os
|
3 |
from groq import Groq
|
4 |
import soundfile as sf
|
5 |
from tempfile import NamedTemporaryFile
|
|
|
6 |
|
7 |
# Load the API key from the environment variable
|
8 |
+
api_key = os.getenv('groq_whisper')
|
9 |
|
10 |
if api_key is None:
|
11 |
raise ValueError("groq_whisper environment variable is not set")
|
|
|
13 |
# Initialize the Groq client
|
14 |
client = Groq(api_key=api_key)
|
15 |
|
16 |
+
|
17 |
def processaudio(audio_data):
|
18 |
try:
|
19 |
# Entpacken der Audiodaten (Sample-Rate und Numpy-Array)
|
|
|
39 |
except Exception as e:
|
40 |
return f"Ein Fehler ist aufgetreten: {str(e)}"
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
+
def process_audio(file_path):
|
45 |
+
|
46 |
try:
|
47 |
+
# Open the audio file
|
48 |
+
with open(file_path, "rb") as file:
|
49 |
+
# Create a transcription of the audio file
|
50 |
+
transcription = client.audio.transcriptions.create(
|
51 |
+
file=(os.path.basename(file_path), file.read()), # Correct passing of filename
|
52 |
+
model="whisper-large-v3-turbo", # Required model to use for transcription
|
53 |
+
prompt="transcribe", # Optional
|
54 |
+
language="de", # Optional
|
55 |
+
response_format="json", # Optional
|
56 |
+
temperature=0.0 # Optional
|
57 |
+
)
|
58 |
+
# Return the transcription text
|
59 |
+
sr_inputs.clear()
|
60 |
+
return transcription.text
|
61 |
except Exception as e:
|
62 |
+
return f"An error occurred: {str(e)}"
|
63 |
+
|
64 |
+
with gr.Blocks() as speech:
|
65 |
+
with gr.Row():
|
66 |
+
sr_outputs = gr.Textbox(label="Transkription")
|
67 |
+
with gr.Row():
|
68 |
+
sr_inputs = gr.Microphone(type="numpy")
|
69 |
+
sr_inputs.change(processaudio, inputs=sr_inputs, outputs=sr_outputs)
|
70 |
+
|
71 |
+
speech.launch()
|