Spaces:
Build error
Build error
app.py
CHANGED
|
@@ -7,6 +7,11 @@ import pandas as pd
|
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
import torch
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
#input - video link, output - full transcript
|
| 11 |
def get_transcript(link):
|
| 12 |
print("******** Inside get_transcript ********")
|
|
@@ -21,8 +26,8 @@ def get_transcript(link):
|
|
| 21 |
#input - question and transcript, output - answer timestamp
|
| 22 |
def get_answers_timestamp(question, final_transcript, transcript):
|
| 23 |
print("******** Inside get_answers_timestamp ********")
|
| 24 |
-
model_ckpt = "deepset/minilm-uncased-squad2"
|
| 25 |
-
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
| 26 |
#question = "any funny examples in video??"
|
| 27 |
context = final_transcript
|
| 28 |
print(f"Input Question is : {question}")
|
|
@@ -37,7 +42,7 @@ def get_answers_timestamp(question, final_transcript, transcript):
|
|
| 37 |
#print(ques)
|
| 38 |
#print(contx)
|
| 39 |
|
| 40 |
-
model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
|
| 41 |
lst=[]
|
| 42 |
pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
|
| 43 |
for contexts in contx:
|
|
@@ -51,19 +56,26 @@ def get_answers_timestamp(question, final_transcript, transcript):
|
|
| 51 |
idxmax2 = lst_scores.index(max(lst_scores))
|
| 52 |
|
| 53 |
sentence_for_timestamp = lst[idxmax]['answer']
|
|
|
|
| 54 |
|
| 55 |
dftranscript = pd.DataFrame(transcript)
|
| 56 |
|
| 57 |
-
modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 58 |
embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
|
| 59 |
embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
|
| 60 |
-
|
|
|
|
| 61 |
similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
|
| 62 |
idx = torch.argmax(similarity_tensor)
|
| 63 |
start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
|
| 64 |
start_timestamp = round(start_timestamp)
|
| 65 |
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
def display_vid(url, question, sample_question=None, example_video=None):
|
|
@@ -83,11 +95,12 @@ def display_vid(url, question, sample_question=None, example_video=None):
|
|
| 83 |
|
| 84 |
#get answer timestamp
|
| 85 |
#input - question and transcript, output - answer timestamp
|
| 86 |
-
ans_timestamp = get_answers_timestamp(question, final_transcript, transcript)
|
| 87 |
|
| 88 |
#created embedding
|
| 89 |
html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
| 90 |
print(f"html output is : {html_out}")
|
|
|
|
| 91 |
|
| 92 |
if question == '':
|
| 93 |
print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
|
|
@@ -95,7 +108,7 @@ def display_vid(url, question, sample_question=None, example_video=None):
|
|
| 95 |
print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
|
| 96 |
else:
|
| 97 |
sample_ques = question
|
| 98 |
-
return html_out, sample_ques, url
|
| 99 |
|
| 100 |
def set_example_question(sample_question):
|
| 101 |
print(f"******* Inside Sample Questions ********")
|
|
@@ -114,17 +127,20 @@ with demo:
|
|
| 114 |
with gr.Row():
|
| 115 |
input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
|
| 116 |
input_ques = gr.Textbox(label="Ask a Question")
|
|
|
|
|
|
|
| 117 |
output_vid = gr.HTML(label="Video will play at the answer timestamp")
|
| 118 |
-
|
|
|
|
| 119 |
with gr.Row():
|
| 120 |
example_question = gr.Dropdown(
|
| 121 |
["Choose a sample question", "Does video talk about different modalities",
|
| 122 |
"does the model uses perceiver architecture?",
|
| 123 |
-
"how was the data collected for flamingo?",
|
| 124 |
"when does the video talk about locked image tuning or lit?",
|
| 125 |
-
"comparison of clip and lit?",
|
| 126 |
-
"when does jurassic model starts?",
|
| 127 |
-
"when does miracle model starts?",
|
| 128 |
"comparison between gpt3 and jurassic?",
|
| 129 |
#"Can the model do classification",
|
| 130 |
#"Does the model pushes state of the art in image classification",
|
|
@@ -133,13 +149,21 @@ with demo:
|
|
| 133 |
"Has flamingo passed turing test yet?",
|
| 134 |
#"Are there cool examples from flamingo in the video?",
|
| 135 |
#"Does the video talk about cat?",
|
| 136 |
-
"Any funny examples in video?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
with gr.Row():
|
| 138 |
example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
|
| 139 |
#example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
|
| 140 |
|
| 141 |
b1 = gr.Button("Publish Video")
|
| 142 |
|
| 143 |
-
b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, input_ques, input_url])
|
| 144 |
|
| 145 |
demo.launch(enable_queue=True, debug=True)
|
|
|
|
| 7 |
from sentence_transformers import SentenceTransformer, util
|
| 8 |
import torch
|
| 9 |
|
| 10 |
+
model_ckpt = "deepset/minilm-uncased-squad2"
|
| 11 |
+
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
| 12 |
+
model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
|
| 13 |
+
modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
| 14 |
+
|
| 15 |
#input - video link, output - full transcript
|
| 16 |
def get_transcript(link):
|
| 17 |
print("******** Inside get_transcript ********")
|
|
|
|
| 26 |
#input - question and transcript, output - answer timestamp
|
| 27 |
def get_answers_timestamp(question, final_transcript, transcript):
|
| 28 |
print("******** Inside get_answers_timestamp ********")
|
| 29 |
+
#model_ckpt = "deepset/minilm-uncased-squad2" >>>>>>>>>
|
| 30 |
+
#tokenizer = AutoTokenizer.from_pretrained(model_ckpt) >>>>>>>>>>>>
|
| 31 |
#question = "any funny examples in video??"
|
| 32 |
context = final_transcript
|
| 33 |
print(f"Input Question is : {question}")
|
|
|
|
| 42 |
#print(ques)
|
| 43 |
#print(contx)
|
| 44 |
|
| 45 |
+
#model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt) >>>>>>>>>>>>>>
|
| 46 |
lst=[]
|
| 47 |
pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
|
| 48 |
for contexts in contx:
|
|
|
|
| 56 |
idxmax2 = lst_scores.index(max(lst_scores))
|
| 57 |
|
| 58 |
sentence_for_timestamp = lst[idxmax]['answer']
|
| 59 |
+
sentence_for_timestamp_secondbest = lst[idxmax2]['answer']
|
| 60 |
|
| 61 |
dftranscript = pd.DataFrame(transcript)
|
| 62 |
|
| 63 |
+
#modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') >>>>>>>>>>>>>>>>
|
| 64 |
embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
|
| 65 |
embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
|
| 66 |
+
embedding_3 = modelST.encode(sentence_for_timestamp_secondbest, convert_to_tensor=True)
|
| 67 |
+
|
| 68 |
similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
|
| 69 |
idx = torch.argmax(similarity_tensor)
|
| 70 |
start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
|
| 71 |
start_timestamp = round(start_timestamp)
|
| 72 |
|
| 73 |
+
similarity_tensor_secondbest = util.pytorch_cos_sim(embedding_1, embedding_3)
|
| 74 |
+
idx_secondbest = torch.argmax(similarity_tensor_secondbest)
|
| 75 |
+
start_timestamp_secondbest = dftranscript.iloc[[int(idx_secondbest)-3]].start.values[0]
|
| 76 |
+
start_timestamp_secondbest = round(start_timestamp_secondbest)
|
| 77 |
+
|
| 78 |
+
return start_timestamp, start_timestamp_secondbest
|
| 79 |
|
| 80 |
|
| 81 |
def display_vid(url, question, sample_question=None, example_video=None):
|
|
|
|
| 95 |
|
| 96 |
#get answer timestamp
|
| 97 |
#input - question and transcript, output - answer timestamp
|
| 98 |
+
ans_timestamp, ans_timestamp_secondbest = get_answers_timestamp(question, final_transcript, transcript)
|
| 99 |
|
| 100 |
#created embedding
|
| 101 |
html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
| 102 |
print(f"html output is : {html_out}")
|
| 103 |
+
html_out_secondbest = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
|
| 104 |
|
| 105 |
if question == '':
|
| 106 |
print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
|
|
|
|
| 108 |
print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
|
| 109 |
else:
|
| 110 |
sample_ques = question
|
| 111 |
+
return html_out, html_out_secondbest, sample_ques, url
|
| 112 |
|
| 113 |
def set_example_question(sample_question):
|
| 114 |
print(f"******* Inside Sample Questions ********")
|
|
|
|
| 127 |
with gr.Row():
|
| 128 |
input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
|
| 129 |
input_ques = gr.Textbox(label="Ask a Question")
|
| 130 |
+
|
| 131 |
+
with gr.Row():
|
| 132 |
output_vid = gr.HTML(label="Video will play at the answer timestamp")
|
| 133 |
+
output_vid_secondbest = gr.HTML(label="Video will play at the answer timestamp")
|
| 134 |
+
|
| 135 |
with gr.Row():
|
| 136 |
example_question = gr.Dropdown(
|
| 137 |
["Choose a sample question", "Does video talk about different modalities",
|
| 138 |
"does the model uses perceiver architecture?",
|
| 139 |
+
#"how was the data collected for flamingo?",
|
| 140 |
"when does the video talk about locked image tuning or lit?",
|
| 141 |
+
#"comparison of clip and lit?",
|
| 142 |
+
#"when does jurassic model starts?",
|
| 143 |
+
#"when does miracle model starts?",
|
| 144 |
"comparison between gpt3 and jurassic?",
|
| 145 |
#"Can the model do classification",
|
| 146 |
#"Does the model pushes state of the art in image classification",
|
|
|
|
| 149 |
"Has flamingo passed turing test yet?",
|
| 150 |
#"Are there cool examples from flamingo in the video?",
|
| 151 |
#"Does the video talk about cat?",
|
| 152 |
+
"Any funny examples in video?",
|
| 153 |
+
"is there a demo of jurassic?",
|
| 154 |
+
"is it possible to download the stylegan model?",
|
| 155 |
+
"does the video covers graph neural network ?",
|
| 156 |
+
"what was very cool?",
|
| 157 |
+
"does yannic like jax?",
|
| 158 |
+
"were there any book suggestions?",
|
| 159 |
+
"does the video discuss multilingual language models?",
|
| 160 |
+
"whar is the cool library?"], label= "Choose a sample Question", value=None)
|
| 161 |
with gr.Row():
|
| 162 |
example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
|
| 163 |
#example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
|
| 164 |
|
| 165 |
b1 = gr.Button("Publish Video")
|
| 166 |
|
| 167 |
+
b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, output_vid_secondbest, input_ques, input_url])
|
| 168 |
|
| 169 |
demo.launch(enable_queue=True, debug=True)
|