Spaces:
Running
Running
| import gradio as gr | |
| import whisper | |
| import torch | |
| import json | |
| import spaces | |
| from datetime import timedelta | |
| import os | |
| import zipfile | |
| from pathlib import Path | |
| def format_timestamp(seconds): | |
| """Convert seconds to SRT timestamp format""" | |
| td = timedelta(seconds=seconds) | |
| hours = td.seconds//3600 | |
| minutes = (td.seconds//60)%60 | |
| seconds = td.seconds%60 | |
| milliseconds = td.microseconds//1000 | |
| return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}" | |
| def save_files(text, srt, json_data, base_name): | |
| """Save transcription in different formats and create zip""" | |
| # Create output directory if it doesn't exist | |
| output_dir = Path("transcriptions") | |
| output_dir.mkdir(exist_ok=True) | |
| # Generate filenames | |
| base_name = Path(base_name).stem | |
| txt_path = output_dir / f"{base_name}.txt" | |
| srt_path = output_dir / f"{base_name}.srt" | |
| json_path = output_dir / f"{base_name}.json" | |
| zip_path = output_dir / f"{base_name}_all.zip" | |
| # Save individual files | |
| txt_path.write_text(text) | |
| srt_path.write_text(srt) | |
| json_path.write_text(json_data) | |
| # Create ZIP file | |
| with zipfile.ZipFile(zip_path, 'w') as zipf: | |
| zipf.write(txt_path, txt_path.name) | |
| zipf.write(srt_path, srt_path.name) | |
| zipf.write(json_path, json_path.name) | |
| return str(txt_path), str(srt_path), str(json_path), str(zip_path) | |
| def transcribe(audio_file): | |
| # Load the Whisper model | |
| model = whisper.load_model("large-v3-turbo") | |
| # Transcribe the audio | |
| result = model.transcribe(audio_file) | |
| # Format as plain text | |
| text_output = result["text"] | |
| # Format as JSON | |
| json_output = json.dumps(result, indent=2) | |
| # Format as SRT | |
| srt_output = "" | |
| for i, segment in enumerate(result["segments"], 1): | |
| start_time = format_timestamp(segment["start"]) | |
| end_time = format_timestamp(segment["end"]) | |
| text = segment["text"].strip() | |
| srt_output += f"{i}\n{start_time} --> {end_time}\n{text}\n\n" | |
| # Save files and get paths | |
| txt_file, srt_file, json_file, zip_file = save_files( | |
| text_output, srt_output, json_output, | |
| os.path.basename(audio_file) | |
| ) | |
| return ( | |
| txt_file, srt_file, json_file, zip_file, text_output, srt_output, json_output | |
| ) | |
| # Create the Gradio interface | |
| demo = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(type="filepath", label="Upload Audio"), | |
| outputs=[ | |
| gr.File(label="Download TXT"), | |
| gr.File(label="Download SRT"), | |
| gr.File(label="Download JSON"), | |
| gr.File(label="Download All (ZIP)"), | |
| gr.Textbox(label="Transcription", lines=5), | |
| gr.Textbox(label="SRT Format"), | |
| gr.JSON(label="JSON Output") | |
| ], | |
| title="Audio Transcription with Whisper", | |
| description="Upload an audio file to transcribe it into text, SRT, and JSON formats using OpenAI's Whisper model. You can download the results in different formats or get everything in a ZIP file." | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |