Spaces:
Running
Running
import gradio as gr | |
from logging_config import log_buffer | |
from transcription_tool import TranscriptTool # Assuming TranscriptionTool is in `transcription_tool.py` | |
# smolagent transcription tool | |
transcript_tool = TranscriptTool() | |
def transcribe_and_stream_logs(file): | |
# Use the path to the uploaded file | |
temp_file_path = file.name | |
# Perform transcription | |
transcription_result = transcript_tool.forward(temp_file_path) | |
# Stream logs | |
log_buffer.seek(0) | |
logs = log_buffer.read() | |
return transcription_result, logs | |
with gr.Blocks() as app: | |
gr.Markdown("# TranscriptTool: Transcribe Audio/Video") | |
gr.Markdown("TranscriptTool is a smolagent tool used to transcribe audio and video files into text. Leveraging OpenAI's Whisper and `ffmpeg`, this tool empowers agents to process multimedia inputs efficiently. It supports robust file handling, dynamic device selection (CPU or GPU), and easy use within smolagents via the Hugging Face API.") | |
file_input = gr.File(label="Upload Audio/Video File", file_types=["audio", "video"]) | |
transcribe_button = gr.Button("Transcribe") | |
transcription_output = gr.Textbox(label="Transcription", lines=10) | |
log_output = gr.Textbox(label="Logs", lines=15) | |
transcribe_button.click( | |
fn=transcribe_and_stream_logs, | |
inputs=file_input, | |
outputs=[transcription_output, log_output] | |
) | |
if __name__ == "__main__": | |
app.launch() | |