import transformers from transformers import AutoTokenizer, AutoModelForCausalLM import gradio as gr from transformers import pipeline import youtube_dl import torch # Load the 'falcon-7b-instruct' model for summarization model = "tiiuae/falcon-7b" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", ) sequences = pipeline( max_length=500, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) def generate_text(text): sequences = pipeline( text, max_length=200, do_sample=True, top_k=10, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id, ) # Function to extract YouTube transcript given a video URL def extract_youtube_transcript(url): ydl_opts = { 'writesubtitles': True, 'subtitleslangs': ['en'], 'skip_download': True, 'ignoreerrors': True } try: with youtube_dl.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=False) if 'subtitles' in info: for subtitle in info['subtitles']: if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en': return subtitle['url'] except Exception as e: print(f"An error occurred while fetching the transcript: {e}") return None # Define the Gradio interface def summarize_youtube_transcript(url): transcript_url = extract_youtube_transcript(url) if transcript_url: # Fetch the transcript try: with youtube_dl.YoutubeDL({}) as ydl: transcript_info = ydl.extract_info(transcript_url, download=False) transcript = transcript_info['subtitles']['en'][0]['text'] summary = generate_text("Write a summary of: ", transcript) return summary[0]['summary_text'] except Exception as e: print(f"An error occurred while processing the transcript: {e}") return "Unable to fetch or process the transcript." # Create the Gradio interface iface = gr.Interface( fn=summarize_youtube_transcript, inputs=gr.inputs.Textbox(label="YouTube URL"), outputs=gr.outputs.Textbox(label="Summary"), title="YouTube Transcript Summarizer", description="Enter a YouTube URL and get a summary of the transcript.", theme="huggingface", use_auth_token=True, # Authenticate Hugging Face API calls ) # Launch the interface iface.launch(share=True)