Spaces:
Sleeping
Sleeping
File size: 9,264 Bytes
abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e c19a8cc abadf6e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 |
import streamlit as st
import requests
import base64
import os
from moviepy.editor import VideoFileClip
from pytube import YouTube
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api._errors import CouldNotRetrieveTranscript
import whisper
import ffmpeg
import re
import tempfile
from huggingface_hub import InferenceClient
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
PROMPT = """Act as the author and provide a comprehensive detailed article in the same language as the transcript
in markdown format that has a H1 main title(example "# <this is a title> ") and broken down into H2 subtitles (example "## <this is a title> ") for the following transcript
You must follow the rules:
- Write the article in markdown format
- Create a main title for the article as markdown H1 and break the article into subtitles where each subtitle is markdown H2
- Article must be in the same language as the transcript
- summary should be informative and act as a replacement for the original transcript to the point that the user doesn't have to go back to read the transcript
- Summary should not mention the author or speaker at all should act as your independent writing without referencing the original transcript or speaker.
- You can use bullet points within the article
Transcript:
{} \n\n Article:"""
LLM = {
"llama3-8b": {'prompt': f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
{PROMPT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
""", 'endpoint': "meta-llama/Meta-Llama-3-8B-Instruct"}
}
@st.cache_resource()
def load_whisper(model):
return whisper.load_model(model)
@st.cache_data
def download_video(url):
if "youtube" in url or "youtu.be" in url:
yt = YouTube(url)
video = yt.streams.get_highest_resolution()
filename = video.download()
else:
response = requests.get(url, stream=True)
filename = url.split("/")[-1]
with open(filename, "wb") as file:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
file.write(chunk)
return filename
@st.cache_data
def convert_to_audio(video_filename):
video = VideoFileClip(video_filename)
audio_filename = video_filename.replace(".mp4", ".mp3")
audio = video.audio
audio.write_audiofile(audio_filename, codec="mp3")
return audio_filename
@st.cache_data
def summarise(prompt, llm):
model = InferenceClient(LLM[llm]["endpoint"])
user_message = LLM[llm]["prompt"].format(prompt)
return model.text_generation(user_message, max_new_tokens=1024)
def delete_files(video_filename, audio_filename):
delete_file(video_filename)
delete_file(audio_filename)
def delete_file(filename):
if os.path.exists(filename):
os.remove(filename)
st.info(f"File '{os.path.basename(filename)}' deleted from the server.")
@st.cache_data
def transcribe_whisper(_model, audio_filepath):
return _model.transcribe(audio_filepath)["text"]
def get_media_download_link(media_type, file_path):
with open(file_path, "rb") as file:
contents = file.read()
encoded = base64.b64encode(contents).decode("utf-8")
media_href = f"data:file/{media_type};base64,{encoded}"
st.markdown(
f'<a href="{media_href}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>',
unsafe_allow_html=True,
)
@st.cache_data
def generate_summaries(_summarizer, text, min_length=50, max_length=500):
paragraphs = text.split("\n\n")
summaries = []
for paragraph in paragraphs:
summary = _summarizer(
paragraph, max_length=max_length, min_length=min_length, do_sample=False
)
summaries.append(summary[0]["summary_text"].strip())
return "\n\n".join(summaries)
def main():
st.title("VidScripter")
st.write("#### A One Stop Solution to Video Transcription")
c1, c2 = st.columns(2)
c1.write(
"""
- Enter the video URL in the text input box.
- Click the **Fetch** button to fetch the video.
- Once the video is fetched, you can perform the following actions:
- Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button.
- Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button.
- The transcript will be displayed in a text area below.
- A summary of the transcript will also be generated by the selected LLM.
- The summary will be displayed in a text area below.
- You can download the video, audio, transcript or summary by clicking the respective download buttons.
"""
)
whisper_model = load_whisper("base")
url = c2.text_input("Enter the video URL")
llm = c2.selectbox("Select LLM", list(LLM.keys()), index=0)
fetch_button = c2.button("Fetch")
st.session_state.setdefault("load_state", False)
if fetch_button or st.session_state.load_state:
st.session_state.load_state = True
if url:
process_video(url, whisper_model, llm)
def process_video(url, whisper_model, llm):
yt = YouTube(url)
video_id = yt.video_id
try:
video_filename = download_video(url)
st.success("Video fetched successfully")
except Exception:
video_filename = None
st.warning("Video could not be fetched")
try:
audio_filename = (
convert_to_audio(video_filename) if video_filename is not None else None
)
if video_filename is not None:
st.success("Audio converted successfully")
else:
st.info("No Video to convert into Audio")
except Exception:
audio_filename = None
st.warning("Audio coud not be converted")
text_filename = (
os.path.basename(video_filename).replace(".mp4", ".txt")
if video_filename is not None
else "transcript.txt"
)
emp = st.empty()
col1, col2, col3, col4 = st.columns(4)
if "youtube" in url or "youtu.be" in url:
process_youtube_video(video_id, col3, emp, text_filename, llm)
process_whisper_transcript(whisper_model, audio_filename, col4, text_filename)
with col1:
if video_filename is not None and st.button("Download Video"):
with st.spinner("Encoding Video"):
get_media_download_link("video", video_filename)
with col2:
if audio_filename is not None and st.button("Download Audio"):
with st.spinner("Encoding Audio"):
get_media_download_link("audio", audio_filename)
def process_youtube_video(video_id, col, emp, text_filename, llm):
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
transcripts = [transcript for transcript in transcript_list]
if transcripts:
transcript_options = {
f"{transcript.language} ({transcript.language_code})": transcript
for transcript in transcripts
}
transcript_option = emp.selectbox(
"Select a transcript", list(transcript_options.keys())
)
selected_transcript = transcript_options[transcript_option]
st.session_state.setdefault("api_transcript", False)
if col.button("Fetch Transcript") or st.session_state.api_transcript:
st.session_state.api_transcript = True
transcript_text = selected_transcript.fetch()
transcript_text = "\n".join(
[re.sub("\s+", " ", chunk["text"]) for chunk in transcript_text]
)
c1, c2 = st.columns(2)
with c1:
modified_text = st.text_area(
"Transcript", transcript_text, height=500
)
st.download_button("Download Transcript", modified_text, text_filename)
with c2:
openai_summarization = summarise(
modified_text, llm
)
summarized_text = st.text_area(
"Summarized Transcript", openai_summarization, height=500
)
st.download_button("Download Summary", summarized_text, text_filename)
except CouldNotRetrieveTranscript:
emp.warning("Could Not Retrieve API Transcripts for this video.")
except Exception as e:
emp.warning(f"Error Fetching API Transcripts for this video. {e}")
def process_whisper_transcript(whisper_model, audio_filename, col, text_filename):
if audio_filename is not None:
st.session_state.setdefault("whisper_transcript", False)
if col.button("Transcribe (Whisper)") or st.session_state.whisper_transcript:
st.session_state.whisper_transcript = True
whisper_text = transcribe_whisper(whisper_model, audio_filename)
modified_text = st.text_area("Transcript", whisper_text, height=500)
st.download_button("Download", modified_text, text_filename)
if __name__ == "__main__":
main() |