Spaces:

PirateXX
/

Youtube-Video-Summarizer

Running

App Files Files Community

PirateXX commited on Jan 15

Commit

3dfb32c

verified ·

1 Parent(s): 7586839

Create app.py

Browse files

Files changed (1) hide show

app.py +130 -0

app.py ADDED Viewed

	@@ -0,0 +1,130 @@

+# imports
+import os
+import requests
+from dotenv import load_dotenv
+from IPython.display import Markdown
+from openai import OpenAI
+from youtube_transcript_api import YouTubeTranscriptApi
+import re
+openai = OpenAI()
+class YoutubeVideoID:
+    def __init__(self, url):
+        self.url = url
+        self.video_id = self.extract_video_id(url)
+    def extract_video_id(self, url):
+        """
+        Extracts the YouTube video ID from a given URL.
+        Supports both regular and shortened URLs.
+        """
+        # Regular expression to match YouTube video URL and extract the video ID
+        regex = r"(?:https?:\/\/)?(?:www\.)?(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|\S*\?v=)|(?:youtu\.be\/))([a-zA-Z0-9_-]{11})"
+        match = re.match(regex, url)
+        if match:
+            return match.group(1)
+        else:
+            raise ValueError("Invalid YouTube URL")
+    def __str__(self):
+        return f"Video ID: {self.video_id}"
+def get_transcript(video_id, language='en'):
+    try:
+        # Try to get the transcript in the desired language (Indonesian by default)
+        transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
+        # Join all the 'text' fields into a single string
+        return " ".join([item['text'] for item in transcript])
+    except Exception as e:
+        print(f"Error fetching transcript: {e}")
+        return None
+def summarize_text(text):
+    try:
+        text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16)
+        output = text_summary(input_text)
+        return output[0]['summary_text']
+    except Exception as e:
+        print(f"Error summarizing text: {e}")
+        return None
+def split_text(text, chunk_size=3000):
+    """
+    Splits large text into smaller chunks based on the given chunk size.
+    Ensures that chunks end with a full stop where possible to maintain sentence integrity.
+    :param text: str, the text to be split
+    :param chunk_size: int, maximum size of each chunk (default 3000 characters)
+    :return: list of str, where each str is a chunk of text
+    """
+    chunks = []
+    while len(text) > chunk_size:
+        # Find the last full stop within or at the chunk size
+        split_point = text.rfind('.', 0, chunk_size + 1)  # +1 to include the period itself if it's at chunk_size
+        if split_point == -1:  # No period found within the chunk size
+            split_point = chunk_size
+        # Append the chunk, ensuring we don't strip spaces that might be part of the sentence structure
+        chunks.append(text[:split_point + 1] if split_point != chunk_size else text[:chunk_size])
+        text = text[split_point + 1:] if split_point != chunk_size else text[chunk_size:]
+    # Add the remaining text as the final chunk, only strip if there's content
+    if text:
+        chunks.append(text.strip())
+    return chunks
+def get_result(video_url, summarize=True):
+    # Fetch transcript using the video ID
+    yt_video = YoutubeVideoID(video_url)
+    transcript_text = get_transcript(yt_video.video_id)
+    if summarize == False:
+        return transcript_text
+    transcript_chunks = split_text(transcript_text)
+    summaries = []
+    for chunk in transcript_chunks:
+        summary = summarize_text(chunk)
+        summaries.append(summary)
+    full_summary = " ".join(summaries)
+    return Markdown(full_summary)
+def create_gradio_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# YouTube Video Summarizer")
+        gr.Markdown("""
+        This space provides summary of youtube video urls, you can also get full transcripts if you choose so.
+        ### Credits:
+        Created by **Arsh** – Providing a simple solution for video summarization!
+        """)
+        # Input for YouTube URL
+        video_url_input = gr.Textbox(label="Enter YouTube URL", lines=1)
+        # Radio button for choosing output type (summary or full transcript)
+        output_type = gr.Radio(choices=["Summary", "Full Transcript"], label="Choose Output Type", value="Summary")
+        # Output for summarized or full transcript text
+        output_text = gr.Textbox(label="Result", lines=6)
+        # Submit button
+        submit_button = gr.Button("Generate", variant="primary")
+        # Define the action for the button press
+        submit_button.click(fn=get_youtube_transcript,
+                            inputs=[video_url_input, output_type],
+                            outputs=[output_text])
+    return demo
+# Launch the interface with user credit
+demo = create_gradio_interface()
+demo.launch(share=True, show_api=True)