Spaces:
Runtime error
Runtime error
File size: 7,193 Bytes
2274cd3 5ac9a87 5a8ba9d 2274cd3 6685eea 2274cd3 5a8ba9d 6d1c02f 5a8ba9d 2274cd3 a8d6167 2274cd3 6d1c02f a8d6167 6d1c02f 2274cd3 c8fcb49 2274cd3 6d1c02f 2274cd3 6d1c02f 2274cd3 6d1c02f 2274cd3 6d1c02f 2274cd3 6d1c02f 5a8ba9d 6d1c02f 5a8ba9d 6d1c02f 2274cd3 5a8ba9d 2274cd3 6d1c02f a8d6167 2274cd3 6d1c02f 2274cd3 a8d6167 2274cd3 a8d6167 2274cd3 5a8ba9d 6d1c02f 5a8ba9d 2274cd3 a8d6167 5a8ba9d 2274cd3 5a8ba9d 01d71d0 2274cd3 a8d6167 5a8ba9d a8d6167 01d71d0 6d1c02f 5a8ba9d 6d1c02f 5a8ba9d 0c8203a 5a8ba9d 2274cd3 6d1c02f 7693546 2274cd3 5a8ba9d 6d1c02f 2274cd3 6d1c02f e702c5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
import os
import openai
import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi
from urllib.parse import urlparse, parse_qs
from requests.structures import CaseInsensitiveDict
openai.api_key = os.getenv("OPENAI_KEY")
def Prompt_T(context, lang):
prompt = """I want you to act as a content writer who is working with youtube video transcript. Summarise the following text in 50 words:
=========
"""+ context +"""
=========
Answer:"""
if (lang=="ru"):prompt = """Я хочу, чтобы вы выступили в роли автора контента, который взят с транскрипт youtube видео, его нужно преобразовать в читаемый вид. Резюмируйте следующий текст в 40 слов:
=========
"""+ context +"""
=========
Ответ:"""
if (lang=="uk"):prompt = """Я хочу, щоб ви виступили в ролі автора контенту, який узятий з транскрипту youtube відео, його треба перетворити у читабельний вигляд. Резюмуйте наступний текст у 50 слів:
=========
"""+ context +"""
=========
Ответ:"""
return prompt
def convert_seconds(seconds):
seconds = round(seconds)
minutes = seconds // 60
hours = minutes // 60
minutes = minutes % 60
seconds = seconds % 60
if (int(hours) > 0 ): time_m= str(hours) + ":" + str(minutes) + ":" + str(seconds)
else : time_m = str(minutes) + ":" + str(seconds)
return time_m
def get_transcript(video_id, lang_video, chunk_size):
global final_string
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])
finalString = ""
result_text_duration=[]
text_duration=""
prev_start=0.0
for item in t:
text = item['text']
finalString += text + " "
text_duration = text_duration + " " + item['text']
if (len(text_duration)>chunk_size):
new_item = {'text': prev_text_duration, 'start': prev_start}
result_text_duration.append(new_item)
text_duration=""
prev_start=item['start']
prev_text_duration=text_duration
new_item = {'text': text_duration, 'start': prev_start}
result_text_duration.append(new_item)
return result_text_duration
def split_string(string, chunk_size):
return [string[i:i+chunk_size] for i in range(0, len(string), chunk_size)]
def gpt_api (input_text):
completion = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[ {"role": "system", "content": input_text} ]
)
response = completion.choices[0].message.content
return response
def generate_video_html(video_url, request: gr.Request):
my_v = ""
if (video_url =="") and (my_v == ""): video_url ="https://youtube.com/watch?v=PQBYZDyDBrY"
#похоже ли video_url на номальну ссылку
if "youtube.com/watch?v=" in video_url: x=111
else: return "Неверный URL"
#Пробуем извлеч video_id пока на английском
video_id = video_url[-11:]
html_embed='<iframe width="450" height="250" src="https://www.youtube.com/embed/'+ video_id +'" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" allowfullscreen name="video_yt" ></iframe>'
return html_embed
def generate(video_url, request: gr.Request):
my_v = ""
#Если две переменные пустые, то показываем базовую страницу с рекламой
if (video_url =="") and (my_v == ""):
html_embed='<div><br> An easy way to get video descriptions If you are on YouTube itself, simply add "zxc" in front of YouTube to the videos address.</div>'
summarize=""
return summarize
#похоже ли video_url на номальну ссылку
if "youtube.com/watch?v=" in video_url: x=111
else: return "Неверный URL"
#Пробуем извлеч video_id пока на английском
video_id = video_url[-11:]
print("video_id=", video_id)
try:
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
except Exception as e:
return "No access for transcript "
lang_video="en"
for transcript in transcript_list:
if (transcript.is_generated == True): lang_video = transcript.language_code
print ("transcript.language_code=", transcript.language_code)
try:
t = YouTubeTranscriptApi.get_transcript(video_id,languages=[lang_video])
# do something with the transcript
except Exception as e:
return "No access for transcript"
finalString = ""
for item in t:
text = item['text']
finalString += text + " "
print ("===============================================")
#print("Transcript:",finalString)
print("Transcript lenght:",len(finalString))
print ("===============================================")
input_string = finalString
chunk_size = 4000
if (lang_video=="ru"): chunk_size = 2000
if (lang_video=="uk"): chunk_size = 2000
result_list = split_string(input_string, chunk_size)
result_list= text_video = get_transcript(video_id, lang_video, chunk_size)
final_answer_gpt=""
count= 0
img_wait='<img src="https://huggingface.co/spaces/LaoCzi/YouTube_Summarize2/resolve/main/22.gif">'
html_content="<br>"+img_wait
yield html_content
for item in result_list:
context = item['text']
time_text = str(convert_seconds(item['start']))
time_seconds = str(round(item['start']))
input_gpt = Prompt_T(context,lang_video)
time_url='<a href="//www.youtube.com/embed/'+ str(video_id) + '?rel=0&autoplay=1&start='+time_seconds +'" target="video_yt">'+ time_text+'</a>'
final_answer_gpt = final_answer_gpt +"<p>" + time_url +" " + gpt_api (input_gpt)+"</p>"
html_content="<h6>"+"<br>"+final_answer_gpt+img_wait
yield html_content
html_content="<h6>"+"<br>"+final_answer_gpt+"</h6>"
yield html_content
title = "YouTube Summorize (en,ua,ru)"
css="""
footer {visibility: hidden}
.gradio-container {padding-top: 100px}
"""
with gr.Blocks(css=css, title=title) as demo:
gr.HTML("<h3>A simple way to summarize YouTube video</h3>")
with gr.Row():
with gr.Column():
input_d = gr.Textbox(label="YouTube video URL", placeholder="https://www.youtube.com/watch?v=XXXXXXXX", value="")
greet_btn = gr.Button("Summarise")
dt_2 = gr.outputs.HTML()
dt_1 = gr.outputs.HTML()
dt =[dt_1, dt_2]
greet_btn.click(generate_video_html, inputs=input_d, outputs=dt_2)
greet_btn.click(generate, inputs=input_d, outputs=dt_1)
demo.load(generate_video_html, inputs=input_d, outputs=dt_2)
demo.load(generate, inputs=input_d, outputs=dt_1)
demo.queue()
demo.launch(debug=True, share=False)
|