TranscriptTool / app.py
maguid28's picture
Implemented smolagent tool
13d3de7
import gradio as gr
from logging_config import log_buffer
from transcription_tool import TranscriptTool # Assuming TranscriptionTool is in `transcription_tool.py`
# smolagent transcription tool
transcript_tool = TranscriptTool()
def transcribe_and_stream_logs(file):
# Use the path to the uploaded file
temp_file_path = file.name
# Perform transcription
transcription_result = transcript_tool.forward(temp_file_path)
# Stream logs
log_buffer.seek(0)
logs = log_buffer.read()
return transcription_result, logs
with gr.Blocks() as app:
gr.Markdown("# TranscriptTool: Transcribe Audio/Video")
gr.Markdown("TranscriptTool is a smolagent tool used to transcribe audio and video files into text. Leveraging OpenAI's Whisper and `ffmpeg`, this tool empowers agents to process multimedia inputs efficiently. It supports robust file handling, dynamic device selection (CPU or GPU), and easy use within smolagents via the Hugging Face API.")
file_input = gr.File(label="Upload Audio/Video File", file_types=["audio", "video"])
transcribe_button = gr.Button("Transcribe")
transcription_output = gr.Textbox(label="Transcription", lines=10)
log_output = gr.Textbox(label="Logs", lines=15)
transcribe_button.click(
fn=transcribe_and_stream_logs,
inputs=file_input,
outputs=[transcription_output, log_output]
)
if __name__ == "__main__":
app.launch()