transcribeaudio / app.py
mgokg's picture
Update app.py
1f68263 verified
raw
history blame
2.36 kB
import gradio as gr
import os
from groq import Groq
# Load the API key from the environment variable
api_key = os.getenv('groq_whisper')
if api_key is None:
raise ValueError("groq_whisper environment variable is not set")
# Initialize the Groq client
client = Groq(api_key=api_key)
def processaudio(audio_data):
try:
# Entpacken der Audiodaten (Sample-Rate und Numpy-Array)
sample_rate, samples = audio_data
# Temporäre Audiodatei erstellen
with NamedTemporaryFile(suffix=".wav", delete=True) as tmpfile:
# Audio als WAV-Datei speichern
sf.write(tmpfile.name, samples, sample_rate)
# Datei erneut öffnen und an Groq senden
with open(tmpfile.name, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(os.path.basename(tmpfile.name), file.read()),
model="whisper-large-v3-turbo",
prompt="transcribe",
language="de",
response_format="json",
temperature=0.0
)
return transcription.text
except Exception as e:
return f"Ein Fehler ist aufgetreten: {str(e)}"
def process_audio(file_path):
try:
# Open the audio file
with open(file_path, "rb") as file:
# Create a transcription of the audio file
transcription = client.audio.transcriptions.create(
file=(os.path.basename(file_path), file.read()), # Correct passing of filename
model="whisper-large-v3-turbo", # Required model to use for transcription
prompt="transcribe", # Optional
language="de", # Optional
response_format="json", # Optional
temperature=0.0 # Optional
)
# Return the transcription text
sr_inputs.clear()
return transcription.text
except Exception as e:
return f"An error occurred: {str(e)}"
with gr.Blocks() as speech:
with gr.Row():
sr_outputs = gr.Textbox(label="Transkription")
with gr.Row():
sr_inputs = gr.Microphone(type="filepath")
sr_inputs.change(process_audio, inputs=sr_inputs, outputs=sr_outputs)
speech.launch()