Spaces:

RanAlh443
/

Audio_Transcription_and_Summarization

Sleeping

File size: 2,941 Bytes

04a42cc
d41f118
04a42cc
 
 
d41f118
 
04a42cc
 
d41f118
428dc06
c9abb42
 
 
d41f118
 
 
c9abb42
d41f118
 
 
c9abb42
d41f118
 
c9abb42
 
d41f118
 
 
 
 
 
c9abb42
 
d41f118
 
 
 
 
 
 
 
 
c9abb42
d41f118
 
c9abb42
 
 
 
 
 
d41f118
c9abb42
d41f118
c9abb42
d41f118
 
9993ee3
c9abb42
 
d41f118
 
c9abb42
d41f118
 
c9abb42
 
d41f118

import gradio as gr
import whisper    # Library for speech recognition
from transformers import pipeline
import pandas as pd


# Load the Whisper model for speech recognition
whisper_model = whisper.load_model("base")

# Load the summarization model from Hugging Face
summarization = pipeline("summarization", model="google/pegasus-large")

def process_audio(audio_file, min_length, max_length):
    try:
        # Ensure audio_file is not None and has valid content
        if audio_file is None:
            raise ValueError("No audio file provided.")

        # Use the Whisper model to transcribe the audio file into text
        result = whisper_model.transcribe(audio_file)
        text = result['text']

        # Check if transcription was successful
        if not text:
            raise ValueError("Failed to transcribe the audio. The transcription result is empty.")

        # Use the summarization pipeline to summarize the transcribed text
        summary_result = summarization(text, min_length=min_length, max_length=max_length)
        summary = summary_result[0]['summary_text']

        # Check if summarization was successful
        if not summary:
            raise ValueError("Failed to summarize the transcript. The summary result is empty.")

        # Create a DataFrame to store the audio file, transcript, and summary
        df_results = pd.DataFrame({
            "Audio File": [audio_file],  # Store the path to the audio file
            "Transcript": [text],       # Store the transcribed text
            "Summary": [summary]        # Store the generated summary
        })

        # Save the results to a CSV file named "results.csv"
        df_results.to_csv("results.csv", index=False)

        # Return the transcript and summary to be displayed in the Gradio interface
        return text, summary

    except Exception as e:
        # General error handling
        error_message = f"An error occurred: {str(e)}"
        return error_message, error_message

# Create a Gradio interface
iface = gr.Interface(
    fn=process_audio,  # The function to be called when processing the input
    inputs=[
        gr.Audio(sources="upload", type="filepath", label="Upload your audio file"),  # Audio input field for file upload
        gr.Slider(minimum=10, maximum=50, value=30, label="Minimum Summary Length"),   # Slider for setting minimum summary length
        gr.Slider(minimum=50, maximum=600, value=100, label="Maximum Summary Length")  # Slider for setting maximum summary length
    ],
    outputs=[
        gr.Textbox(label="Transcript"),  # Textbox for displaying the transcript
        gr.Textbox(label="Summary")      # Textbox for displaying the summary
    ],
    title="Audio to Summarized Transcript",  # Title of the app
    description="Upload an audio file and adjust summary length to get both the transcript and summary."  # Description of the app
)

# Launch the app
iface.launch()