File size: 9,264 Bytes
abadf6e
 
 
 
 
 
 
 
 
 
 
 
c19a8cc
abadf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19a8cc
 
 
 
 
 
 
 
 
abadf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19a8cc
 
 
 
 
abadf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19a8cc
abadf6e
 
 
 
 
 
 
 
c19a8cc
abadf6e
 
 
 
 
 
 
c19a8cc
abadf6e
 
c19a8cc
abadf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19a8cc
abadf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
c19a8cc
abadf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c19a8cc
abadf6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import streamlit as st
import requests
import base64
import os
from moviepy.editor import VideoFileClip
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import CouldNotRetrieveTranscript
import whisper
import ffmpeg
import re
import tempfile
from huggingface_hub import InferenceClient

st.set_page_config(layout="wide", initial_sidebar_state="collapsed")

PROMPT = """Act as the author and provide a comprehensive detailed article in the same language as the transcript
        in markdown format that has a H1 main title(example "# <this is a title> ") and broken down into H2 subtitles (example "## <this is a title> ") for the following transcript
You must follow the rules:
  
- Write the article in markdown format
- Create a main title for the article as markdown H1 and break the article into subtitles where each subtitle is markdown H2
- Article must be in the same language as the transcript
- summary should be informative and act as a replacement for the original transcript to the point that the user doesn't have to go back to read the transcript
- Summary should not mention the author or speaker at all should act as your independent writing without referencing the original transcript or speaker. 
- You can use bullet points within the article
Transcript:
 {} \n\n Article:"""


LLM = {
    "llama3-8b": {'prompt': f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>

{PROMPT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>


""", 'endpoint': "meta-llama/Meta-Llama-3-8B-Instruct"}
}

@st.cache_resource()
def load_whisper(model):
    return whisper.load_model(model)


@st.cache_data
def download_video(url):
    if "youtube" in url or "youtu.be" in url:
        yt = YouTube(url)
        video = yt.streams.get_highest_resolution()
        filename = video.download()
    else:
        response = requests.get(url, stream=True)
        filename = url.split("/")[-1]
        with open(filename, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    file.write(chunk)
    return filename


@st.cache_data
def convert_to_audio(video_filename):
    video = VideoFileClip(video_filename)
    audio_filename = video_filename.replace(".mp4", ".mp3")
    audio = video.audio
    audio.write_audiofile(audio_filename, codec="mp3")
    return audio_filename


@st.cache_data
def summarise(prompt, llm):
    model = InferenceClient(LLM[llm]["endpoint"])
    user_message = LLM[llm]["prompt"].format(prompt)
    return model.text_generation(user_message,  max_new_tokens=1024)
    


def delete_files(video_filename, audio_filename):
    delete_file(video_filename)
    delete_file(audio_filename)


def delete_file(filename):
    if os.path.exists(filename):
        os.remove(filename)
        st.info(f"File '{os.path.basename(filename)}' deleted from the server.")


@st.cache_data
def transcribe_whisper(_model, audio_filepath):
    return _model.transcribe(audio_filepath)["text"]


def get_media_download_link(media_type, file_path):
    with open(file_path, "rb") as file:
        contents = file.read()
        encoded = base64.b64encode(contents).decode("utf-8")
        media_href = f"data:file/{media_type};base64,{encoded}"
        st.markdown(
            f'<a href="{media_href}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>',
            unsafe_allow_html=True,
        )


@st.cache_data
def generate_summaries(_summarizer, text, min_length=50, max_length=500):
    paragraphs = text.split("\n\n")
    summaries = []
    for paragraph in paragraphs:
        summary = _summarizer(
            paragraph, max_length=max_length, min_length=min_length, do_sample=False
        )
        summaries.append(summary[0]["summary_text"].strip())
    return "\n\n".join(summaries)


def main():
    st.title("VidScripter")
    st.write("#### A One Stop Solution to Video Transcription")
    c1, c2 = st.columns(2)
    c1.write(
        """
        - Enter the video URL in the text input box.
        - Click the **Fetch** button to fetch the video.
        - Once the video is fetched, you can perform the following actions:
            - Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button.
            - Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button.
        - The transcript will be displayed in a text area below.
        - A summary of the transcript will also be generated by the selected LLM.
        - The summary will be displayed in a text area below.
        - You can download the video, audio, transcript or summary by clicking the respective download buttons.
        """
    )

    whisper_model = load_whisper("base")

    url = c2.text_input("Enter the video URL")
    llm = c2.selectbox("Select LLM", list(LLM.keys()), index=0)
    fetch_button = c2.button("Fetch")
    st.session_state.setdefault("load_state", False)

    if fetch_button or st.session_state.load_state:
        st.session_state.load_state = True

        if url:
            process_video(url, whisper_model, llm)


def process_video(url, whisper_model, llm):
    yt = YouTube(url)
    video_id = yt.video_id
    try:
        video_filename = download_video(url)
        st.success("Video fetched successfully")
    except Exception:
        video_filename = None
        st.warning("Video could not be fetched")

    try:
        audio_filename = (
            convert_to_audio(video_filename) if video_filename is not None else None
        )
        if video_filename is not None:
            st.success("Audio converted successfully")
        else:
            st.info("No Video to convert into Audio")
    except Exception:
        audio_filename = None
        st.warning("Audio coud not be converted")

    text_filename = (
        os.path.basename(video_filename).replace(".mp4", ".txt")
        if video_filename is not None
        else "transcript.txt"
    )
    emp = st.empty()

    col1, col2, col3, col4 = st.columns(4)

    if "youtube" in url or "youtu.be" in url:
        process_youtube_video(video_id, col3, emp, text_filename, llm)

    process_whisper_transcript(whisper_model, audio_filename, col4, text_filename)

    with col1:
        if video_filename is not None and st.button("Download Video"):
            with st.spinner("Encoding Video"):
                get_media_download_link("video", video_filename)

    with col2:
        if audio_filename is not None and st.button("Download Audio"):
            with st.spinner("Encoding Audio"):
                get_media_download_link("audio", audio_filename)


def process_youtube_video(video_id, col, emp, text_filename, llm):
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        transcripts = [transcript for transcript in transcript_list]

        if transcripts:
            transcript_options = {
                f"{transcript.language} ({transcript.language_code})": transcript
                for transcript in transcripts
            }
            transcript_option = emp.selectbox(
                "Select a transcript", list(transcript_options.keys())
            )
            selected_transcript = transcript_options[transcript_option]

            st.session_state.setdefault("api_transcript", False)
            if col.button("Fetch Transcript") or st.session_state.api_transcript:
                st.session_state.api_transcript = True
                transcript_text = selected_transcript.fetch()
                transcript_text = "\n".join(
                    [re.sub("\s+", " ", chunk["text"]) for chunk in transcript_text]
                )
                c1, c2 = st.columns(2)
                with c1:
                    modified_text = st.text_area(
                        "Transcript", transcript_text, height=500
                    )
                    st.download_button("Download Transcript", modified_text, text_filename)
                with c2:
                    openai_summarization = summarise(
                        modified_text, llm
                    )
                    summarized_text = st.text_area(
                        "Summarized Transcript", openai_summarization, height=500
                    )
                    st.download_button("Download Summary", summarized_text, text_filename)

    except CouldNotRetrieveTranscript:
        emp.warning("Could Not Retrieve API Transcripts for this video.")
    except Exception as e:
        emp.warning(f"Error Fetching API Transcripts for this video. {e}")


def process_whisper_transcript(whisper_model, audio_filename, col, text_filename):
    if audio_filename is not None:
        st.session_state.setdefault("whisper_transcript", False)
        if col.button("Transcribe (Whisper)") or st.session_state.whisper_transcript:
            st.session_state.whisper_transcript = True
            whisper_text = transcribe_whisper(whisper_model, audio_filename)
            modified_text = st.text_area("Transcript", whisper_text, height=500)
            st.download_button("Download", modified_text, text_filename)


if __name__ == "__main__":
    main()