File size: 2,661 Bytes
2e7a962
c657e66
8365795
 
 
a8569f1
8365795
 
c657e66
8365795
c657e66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8365795
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c657e66
8365795
 
 
 
 
 
 
 
 
 
 
 
 
aafc9f3
8365795
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
from transformers import pipeline
import youtube_dl
import torch

# Load the 'falcon-7b-instruct' model for summarization
model = "tiiuae/falcon-7b"

tokenizer = AutoTokenizer.from_pretrained(model)
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
)

sequences = pipeline(
    max_length=500,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id,
)

def generate_text(text):
  sequences = pipeline(
      text,
      max_length=200,
      do_sample=True,
      top_k=10,
      num_return_sequences=1,
      eos_token_id=tokenizer.eos_token_id,
  )
# Function to extract YouTube transcript given a video URL
def extract_youtube_transcript(url):
    ydl_opts = {
        'writesubtitles': True,
        'subtitleslangs': ['en'],
        'skip_download': True,
        'ignoreerrors': True
    }
    try:
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
            if 'subtitles' in info:
                for subtitle in info['subtitles']:
                    if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en':
                        return subtitle['url']
    except Exception as e:
        print(f"An error occurred while fetching the transcript: {e}")
    return None

# Define the Gradio interface
def summarize_youtube_transcript(url):
    transcript_url = extract_youtube_transcript(url)
    if transcript_url:
        # Fetch the transcript
        try:
            with youtube_dl.YoutubeDL({}) as ydl:
                transcript_info = ydl.extract_info(transcript_url, download=False)
                transcript = transcript_info['subtitles']['en'][0]['text']
                summary = generate_text("Write a summary of: ", transcript)
                return summary[0]['summary_text']
        except Exception as e:
            print(f"An error occurred while processing the transcript: {e}")
    return "Unable to fetch or process the transcript."

# Create the Gradio interface
iface = gr.Interface(
    fn=summarize_youtube_transcript,
    inputs=gr.inputs.Textbox(label="YouTube URL"),
    outputs=gr.outputs.Textbox(label="Summary"),
    title="YouTube Transcript Summarizer",
    description="Enter a YouTube URL and get a summary of the transcript.",
    theme="huggingface",
    use_auth_token=True,  # Authenticate Hugging Face API calls
)

# Launch the interface
iface.launch(share=True)