Spaces:

Gradio-Blocks
/

Ask_Questions_To_YouTube_Videos

Build error

App Files Files Community

ysharma HF Staff commited on May 23, 2022

Commit

cc201b9

1 Parent(s): 0c2ffa7

1

Browse files

Files changed (1) hide show

app.py +39 -15

app.py CHANGED Viewed

@@ -7,6 +7,11 @@ import pandas as pd
 from sentence_transformers import SentenceTransformer, util
 import torch
 #input - video link, output - full transcript
 def get_transcript(link):
   print("******** Inside get_transcript ********")
@@ -21,8 +26,8 @@ def get_transcript(link):
 #input - question and transcript, output - answer timestamp
 def get_answers_timestamp(question, final_transcript, transcript):
   print("******** Inside get_answers_timestamp ********")
-  model_ckpt = "deepset/minilm-uncased-squad2"
-  tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
   #question = "any funny examples in video??"
   context = final_transcript
   print(f"Input Question is : {question}")
@@ -37,7 +42,7 @@ def get_answers_timestamp(question, final_transcript, transcript):
   #print(ques)
   #print(contx)
-  model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
   lst=[]
   pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
   for contexts in contx:
@@ -51,19 +56,26 @@ def get_answers_timestamp(question, final_transcript, transcript):
   idxmax2 = lst_scores.index(max(lst_scores))
   sentence_for_timestamp = lst[idxmax]['answer']
   dftranscript = pd.DataFrame(transcript)
-  modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
   embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
   embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
   similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
   idx = torch.argmax(similarity_tensor)
   start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
   start_timestamp = round(start_timestamp)
-  return start_timestamp
 def display_vid(url, question, sample_question=None, example_video=None):
@@ -83,11 +95,12 @@ def display_vid(url, question, sample_question=None, example_video=None):
   #get answer timestamp
   #input - question and transcript, output - answer timestamp
-  ans_timestamp = get_answers_timestamp(question, final_transcript, transcript)
   #created embedding
   html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
   print(f"html output is : {html_out}")
   if question == '':
     print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
@@ -95,7 +108,7 @@ def display_vid(url, question, sample_question=None, example_video=None):
     print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
   else:
     sample_ques = question
-  return html_out, sample_ques, url
 def set_example_question(sample_question):
     print(f"******* Inside Sample Questions ********")
@@ -114,17 +127,20 @@ with demo:
   with gr.Row():
     input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
     input_ques = gr.Textbox(label="Ask a Question")
     output_vid = gr.HTML(label="Video will play at the answer timestamp")
   with gr.Row():
     example_question = gr.Dropdown(
                     ["Choose a sample question", "Does video talk about different modalities",
                     "does the model uses perceiver architecture?",
-                    "how was the data collected for flamingo?",
                     "when does the video talk about locked image tuning or lit?",
-                    "comparison of clip and lit?",
-                    "when does jurassic model starts?",
-                    "when does miracle model starts?",
                     "comparison between gpt3 and jurassic?",
                     #"Can the model do classification",
                     #"Does the model pushes state of the art in image classification",
@@ -133,13 +149,21 @@ with demo:
                     "Has flamingo passed turing test yet?",
                     #"Are there cool examples from flamingo in the video?",
                     #"Does the video talk about cat?",
-                    "Any funny examples in video?"], label= "Choose a sample Question", value=None)
   with gr.Row():
     example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
   #example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
   b1 = gr.Button("Publish Video")
-  b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, input_ques, input_url])
 demo.launch(enable_queue=True, debug=True)

 from sentence_transformers import SentenceTransformer, util
 import torch
+model_ckpt = "deepset/minilm-uncased-squad2"
+tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
+model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)
+modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
 #input - video link, output - full transcript
 def get_transcript(link):
   print("******** Inside get_transcript ********")
 #input - question and transcript, output - answer timestamp
 def get_answers_timestamp(question, final_transcript, transcript):
   print("******** Inside get_answers_timestamp ********")
+  #model_ckpt = "deepset/minilm-uncased-squad2"   >>>>>>>>>
+  #tokenizer = AutoTokenizer.from_pretrained(model_ckpt)   >>>>>>>>>>>>
   #question = "any funny examples in video??"
   context = final_transcript
   print(f"Input Question is : {question}")
   #print(ques)
   #print(contx)
+  #model = AutoModelForQuestionAnswering.from_pretrained(model_ckpt)  >>>>>>>>>>>>>>
   lst=[]
   pipe = pipeline("question-answering", model=model, tokenizer=tokenizer)
   for contexts in contx:
   idxmax2 = lst_scores.index(max(lst_scores))
   sentence_for_timestamp = lst[idxmax]['answer']
+  sentence_for_timestamp_secondbest = lst[idxmax2]['answer']
   dftranscript = pd.DataFrame(transcript)
+  #modelST = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')  >>>>>>>>>>>>>>>>
   embedding_1= modelST.encode(dftranscript.text, convert_to_tensor=True)
   embedding_2 = modelST.encode(sentence_for_timestamp, convert_to_tensor=True)
+  embedding_3 = modelST.encode(sentence_for_timestamp_secondbest, convert_to_tensor=True)
   similarity_tensor = util.pytorch_cos_sim(embedding_1, embedding_2)
   idx = torch.argmax(similarity_tensor)
   start_timestamp = dftranscript.iloc[[int(idx)-3]].start.values[0]
   start_timestamp = round(start_timestamp)
+  similarity_tensor_secondbest = util.pytorch_cos_sim(embedding_1, embedding_3)
+  idx_secondbest = torch.argmax(similarity_tensor_secondbest)
+  start_timestamp_secondbest = dftranscript.iloc[[int(idx_secondbest)-3]].start.values[0]
+  start_timestamp_secondbest = round(start_timestamp_secondbest)
+  return start_timestamp, start_timestamp_secondbest
 def display_vid(url, question, sample_question=None, example_video=None):
   #get answer timestamp
   #input - question and transcript, output - answer timestamp
+  ans_timestamp, ans_timestamp_secondbest = get_answers_timestamp(question, final_transcript, transcript)
   #created embedding
   html_out = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
   print(f"html output is : {html_out}")
+  html_out_secondbest = "<iframe width='560' height='315' src='https://www.youtube.com/embed/" + video_id + "?start=" + str(ans_timestamp) + "' title='YouTube video player' frameborder='0' allow='accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture' allowfullscreen></iframe>"
   if question == '':
     print(f"Inside display_vid(), Sample_Question coming from Radio box is BEFORE : {sample_question}")
     print(f"Inside display_vid(), Sample Question coming from Radio box is AFTER : {sample_ques}")
   else:
     sample_ques = question
+  return html_out, html_out_secondbest, sample_ques, url
 def set_example_question(sample_question):
     print(f"******* Inside Sample Questions ********")
   with gr.Row():
     input_url = gr.Textbox(label="Input a Youtube video link") #gr.HTML(placeholder="Enter a video link here..")
     input_ques = gr.Textbox(label="Ask a Question")
+  with gr.Row():
     output_vid = gr.HTML(label="Video will play at the answer timestamp")
+    output_vid_secondbest = gr.HTML(label="Video will play at the answer timestamp")
   with gr.Row():
     example_question = gr.Dropdown(
                     ["Choose a sample question", "Does video talk about different modalities",
                     "does the model uses perceiver architecture?",
+                    #"how was the data collected for flamingo?",
                     "when does the video talk about locked image tuning or lit?",
+                    #"comparison of clip and lit?",
+                    #"when does jurassic model starts?",
+                    #"when does miracle model starts?",
                     "comparison between gpt3 and jurassic?",
                     #"Can the model do classification",
                     #"Does the model pushes state of the art in image classification",
                     "Has flamingo passed turing test yet?",
                     #"Are there cool examples from flamingo in the video?",
                     #"Does the video talk about cat?",
+                    "Any funny examples in video?",
+                    "is there a demo of jurassic?",
+                    "is it possible to download the stylegan model?",
+                    "does the video covers graph neural network ?",
+                    "what was very cool?",
+                    "does yannic like jax?",
+                    "were there any book suggestions?",
+                    "does the video discuss multilingual language models?",
+                    "whar is the cool library?"], label= "Choose a sample Question", value=None)
   with gr.Row():
     example_video = gr.CheckboxGroup( ["https://www.youtube.com/watch?v=smUHQndcmOY"], label= "Choose a sample YouTube video") #, value="Any funny examples in video?")
   #example_question.update(set_example_question) #,inputs=example_question, outputs= input_url) #example_styles.components)
   b1 = gr.Button("Publish Video")
+  b1.click(display_vid, inputs=[input_url, input_ques, example_question, example_video], outputs=[output_vid, output_vid_secondbest, input_ques, input_url])
 demo.launch(enable_queue=True, debug=True)