Spaces:
Runtime error
Runtime error
import transformers | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import gradio as gr | |
from transformers import pipeline | |
import youtube_dl | |
import torch | |
# Load the 'falcon-7b-instruct' model for summarization | |
model = "tiiuae/falcon-7b" | |
tokenizer = AutoTokenizer.from_pretrained(model) | |
pipeline = transformers.pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
torch_dtype=torch.bfloat16, | |
trust_remote_code=True, | |
device_map="auto", | |
) | |
sequences = pipeline( | |
max_length=500, | |
do_sample=True, | |
top_k=10, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
def generate_text(text): | |
sequences = pipeline( | |
text, | |
max_length=200, | |
do_sample=True, | |
top_k=10, | |
num_return_sequences=1, | |
eos_token_id=tokenizer.eos_token_id, | |
) | |
# Function to extract YouTube transcript given a video URL | |
def extract_youtube_transcript(url): | |
ydl_opts = { | |
'writesubtitles': True, | |
'subtitleslangs': ['en'], | |
'skip_download': True, | |
'ignoreerrors': True | |
} | |
try: | |
with youtube_dl.YoutubeDL(ydl_opts) as ydl: | |
info = ydl.extract_info(url, download=False) | |
if 'subtitles' in info: | |
for subtitle in info['subtitles']: | |
if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en': | |
return subtitle['url'] | |
except Exception as e: | |
print(f"An error occurred while fetching the transcript: {e}") | |
return None | |
# Define the Gradio interface | |
def summarize_youtube_transcript(url): | |
transcript_url = extract_youtube_transcript(url) | |
if transcript_url: | |
# Fetch the transcript | |
try: | |
with youtube_dl.YoutubeDL({}) as ydl: | |
transcript_info = ydl.extract_info(transcript_url, download=False) | |
transcript = transcript_info['subtitles']['en'][0]['text'] | |
summary = generate_text("Write a summary of: ", transcript) | |
return summary[0]['summary_text'] | |
except Exception as e: | |
print(f"An error occurred while processing the transcript: {e}") | |
return "Unable to fetch or process the transcript." | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=summarize_youtube_transcript, | |
inputs=gr.inputs.Textbox(label="YouTube URL"), | |
outputs=gr.outputs.Textbox(label="Summary"), | |
title="YouTube Transcript Summarizer", | |
description="Enter a YouTube URL and get a summary of the transcript.", | |
theme="huggingface", | |
use_auth_token=True, # Authenticate Hugging Face API calls | |
) | |
# Launch the interface | |
iface.launch(share=True) | |