Spaces:

Ritvik19
/

VidScripter

Sleeping

File size: 9,264 Bytes

import streamlit as st
import requests
import base64
import os
from moviepy.editor import VideoFileClip
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import CouldNotRetrieveTranscript
import whisper
import ffmpeg
import re
import tempfile
from huggingface_hub import InferenceClient

st.set_page_config(layout="wide", initial_sidebar_state="collapsed")

PROMPT = """Act as the author and provide a comprehensive detailed article in the same language as the transcript
        in markdown format that has a H1 main title(example "# <this is a title> ") and broken down into H2 subtitles (example "## <this is a title> ") for the following transcript
You must follow the rules:
  
- Write the article in markdown format
- Create a main title for the article as markdown H1 and break the article into subtitles where each subtitle is markdown H2
- Article must be in the same language as the transcript
- summary should be informative and act as a replacement for the original transcript to the point that the user doesn't have to go back to read the transcript
- Summary should not mention the author or speaker at all should act as your independent writing without referencing the original transcript or speaker. 
- You can use bullet points within the article
Transcript:
 {} \n\n Article:"""


LLM = {
    "llama3-8b": {'prompt': f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{PROMPT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>


""", 'endpoint': "meta-llama/Meta-Llama-3-8B-Instruct"}
}

@st.cache_resource()
def load_whisper(model):
    return whisper.load_model(model)


@st.cache_data
def download_video(url):
    if "youtube" in url or "youtu.be" in url:
        yt = YouTube(url)
        video = yt.streams.get_highest_resolution()
        filename = video.download()
    else:
        response = requests.get(url, stream=True)
        filename = url.split("/")[-1]
        with open(filename, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
    return filename


@st.cache_data
def convert_to_audio(video_filename):
    video = VideoFileClip(video_filename)
    audio_filename = video_filename.replace(".mp4", ".mp3")
    audio = video.audio
    audio.write_audiofile(audio_filename, codec="mp3")
    return audio_filename


@st.cache_data
def summarise(prompt, llm):
    model = InferenceClient(LLM[llm]["endpoint"])
    user_message = LLM[llm]["prompt"].format(prompt)
    return model.text_generation(user_message,  max_new_tokens=1024)
    


def delete_files(video_filename, audio_filename):
    delete_file(video_filename)
    delete_file(audio_filename)


def delete_file(filename):
    if os.path.exists(filename):
        os.remove(filename)
        st.info(f"File '{os.path.basename(filename)}' deleted from the server.")


@st.cache_data
def transcribe_whisper(_model, audio_filepath):
    return _model.transcribe(audio_filepath)["text"]


def get_media_download_link(media_type, file_path):
    with open(file_path, "rb") as file:
        contents = file.read()
        encoded = base64.b64encode(contents).decode("utf-8")
        media_href = f"data:file/{media_type};base64,{encoded}"
        st.markdown(
            f'<a href="{media_href}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>',
            unsafe_allow_html=True,
        )


@st.cache_data
def generate_summaries(_summarizer, text, min_length=50, max_length=500):
    paragraphs = text.split("\n\n")
    summaries = []
    for paragraph in paragraphs:
        summary = _summarizer(
            paragraph, max_length=max_length, min_length=min_length, do_sample=False
        )
        summaries.append(summary[0]["summary_text"].strip())
    return "\n\n".join(summaries)


def main():
    st.title("VidScripter")
    st.write("#### A One Stop Solution to Video Transcription")
    c1, c2 = st.columns(2)
    c1.write(
        """
        - Enter the video URL in the text input box.
        - Click the **Fetch** button to fetch the video.
        - Once the video is fetched, you can perform the following actions:
            - Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button.
            - Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button.
        - The transcript will be displayed in a text area below.
        - A summary of the transcript will also be generated by the selected LLM.
        - The summary will be displayed in a text area below.
        - You can download the video, audio, transcript or summary by clicking the respective download buttons.
        """
    )

    whisper_model = load_whisper("base")

    url = c2.text_input("Enter the video URL")
    llm = c2.selectbox("Select LLM", list(LLM.keys()), index=0)
    fetch_button = c2.button("Fetch")
    st.session_state.setdefault("load_state", False)

    if fetch_button or st.session_state.load_state:
        st.session_state.load_state = True

        if url:
            process_video(url, whisper_model, llm)


def process_video(url, whisper_model, llm):
    yt = YouTube(url)
    video_id = yt.video_id
    try:
        video_filename = download_video(url)
        st.success("Video fetched successfully")
    except Exception:
        video_filename = None
        st.warning("Video could not be fetched")

    try:
        audio_filename = (
            convert_to_audio(video_filename) if video_filename is not None else None
        )
        if video_filename is not None:
            st.success("Audio converted successfully")
        else:
            st.info("No Video to convert into Audio")
    except Exception:
        audio_filename = None
        st.warning("Audio coud not be converted")

    text_filename = (
        os.path.basename(video_filename).replace(".mp4", ".txt")
        if video_filename is not None
        else "transcript.txt"
    )
    emp = st.empty()

    col1, col2, col3, col4 = st.columns(4)

    if "youtube" in url or "youtu.be" in url:
        process_youtube_video(video_id, col3, emp, text_filename, llm)

    process_whisper_transcript(whisper_model, audio_filename, col4, text_filename)

    with col1:
        if video_filename is not None and st.button("Download Video"):
            with st.spinner("Encoding Video"):
                get_media_download_link("video", video_filename)

    with col2:
        if audio_filename is not None and st.button("Download Audio"):
            with st.spinner("Encoding Audio"):
                get_media_download_link("audio", audio_filename)


def process_youtube_video(video_id, col, emp, text_filename, llm):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcripts = [transcript for transcript in transcript_list]

        if transcripts:
            transcript_options = {
                f"{transcript.language} ({transcript.language_code})": transcript
                for transcript in transcripts
            }
            transcript_option = emp.selectbox(
                "Select a transcript", list(transcript_options.keys())
            )
            selected_transcript = transcript_options[transcript_option]

            st.session_state.setdefault("api_transcript", False)
            if col.button("Fetch Transcript") or st.session_state.api_transcript:
                st.session_state.api_transcript = True
                transcript_text = selected_transcript.fetch()
                transcript_text = "\n".join(
                    [re.sub("\s+", " ", chunk["text"]) for chunk in transcript_text]
                )
                c1, c2 = st.columns(2)
                with c1:
                    modified_text = st.text_area(
                        "Transcript", transcript_text, height=500
                    )
                    st.download_button("Download Transcript", modified_text, text_filename)
                with c2:
                    openai_summarization = summarise(
                        modified_text, llm
                    )
                    summarized_text = st.text_area(
                        "Summarized Transcript", openai_summarization, height=500
                    )
                    st.download_button("Download Summary", summarized_text, text_filename)

    except CouldNotRetrieveTranscript:
        emp.warning("Could Not Retrieve API Transcripts for this video.")
    except Exception as e:
        emp.warning(f"Error Fetching API Transcripts for this video. {e}")


def process_whisper_transcript(whisper_model, audio_filename, col, text_filename):
    if audio_filename is not None:
        st.session_state.setdefault("whisper_transcript", False)
        if col.button("Transcribe (Whisper)") or st.session_state.whisper_transcript:
            st.session_state.whisper_transcript = True
            whisper_text = transcribe_whisper(whisper_model, audio_filename)
            modified_text = st.text_area("Transcript", whisper_text, height=500)
            st.download_button("Download", modified_text, text_filename)


if __name__ == "__main__":
    main()