Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
import pytube
|
3 |
from youtube_transcript_api import YouTubeTranscriptApi as yt
|
4 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
import os
|
6 |
from langchain import PromptTemplate
|
7 |
from langchain import LLMChain
|
@@ -20,24 +20,14 @@ def Summary_BART(text):
|
|
20 |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
21 |
return summary[0]
|
22 |
|
23 |
-
def
|
24 |
-
translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
|
25 |
-
translated_text = translator(text, max_length=512)
|
26 |
-
return translated_text[0]['translation_text']
|
27 |
-
|
28 |
-
def YtToQuizz(link, difficulty_level, language):
|
29 |
video_id = pytube.extract.video_id(link)
|
30 |
transcript = yt.get_transcript(video_id)
|
31 |
-
data = "
|
32 |
-
|
|
|
33 |
summary = Summary_BART(data)
|
34 |
|
35 |
-
if language != "en":
|
36 |
-
translated_data = translate_text(data, language)
|
37 |
-
translated_summary = Summary_BART(translated_data)
|
38 |
-
else:
|
39 |
-
translated_summary = summary
|
40 |
-
|
41 |
mcq_template = """
|
42 |
Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
|
43 |
The difficulty level of the questions should be: {difficulty_level}
|
@@ -46,81 +36,58 @@ def YtToQuizz(link, difficulty_level, language):
|
|
46 |
2. Correct answer
|
47 |
3. Three plausible incorrect answer options
|
48 |
4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
|
49 |
-
The language of the questions should be: {language}
|
50 |
"""
|
51 |
prompt = PromptTemplate(
|
52 |
-
input_variables=['summary', 'difficulty_level'
|
53 |
template=mcq_template
|
54 |
)
|
55 |
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
|
56 |
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
|
57 |
|
58 |
-
|
59 |
"summary": summary,
|
60 |
-
"difficulty_level": difficulty_level
|
61 |
-
"language": "English"
|
62 |
-
})
|
63 |
-
|
64 |
-
response_translated = Generated_mcqs.invoke({
|
65 |
-
"summary": translated_summary,
|
66 |
-
"difficulty_level": difficulty_level,
|
67 |
-
"language": language
|
68 |
})
|
69 |
|
70 |
-
|
71 |
-
response_text_translated = response_translated['text']
|
72 |
|
73 |
# Extract MCQs
|
74 |
mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
|
75 |
-
|
76 |
-
mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL)
|
77 |
-
|
78 |
-
if len(mcqs_en) < 10 or len(mcqs_translated) < 10:
|
79 |
-
return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3
|
80 |
|
81 |
-
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
questions_en.append(f"Q{idx+1}: {question}")
|
88 |
-
correct_answers_en.append(f"Q{idx+1}: {correct_answer}")
|
89 |
-
options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
|
90 |
|
91 |
-
for idx, mcq in enumerate(
|
92 |
question, correct_answer, incorrect_answers = mcq
|
93 |
incorrect_answers = incorrect_answers.split(', ')
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
|
98 |
-
return
|
99 |
|
100 |
-
def main(link, difficulty_level
|
101 |
-
return YtToQuizz(link, difficulty_level
|
102 |
|
103 |
iface = gr.Interface(
|
104 |
fn=main,
|
105 |
inputs=[
|
106 |
gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
|
107 |
-
gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
|
108 |
-
gr.components.Dropdown(
|
109 |
-
["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"],
|
110 |
-
label="Select language:"
|
111 |
-
)
|
112 |
],
|
113 |
outputs=[
|
114 |
-
gr.components.Textbox(label="MCQs Statements
|
115 |
-
gr.components.Textbox(label="Correct Answers
|
116 |
-
gr.components.Textbox(label="Options
|
117 |
-
gr.components.Textbox(label="MCQs Statements (Translated)", lines=20),
|
118 |
-
gr.components.Textbox(label="Correct Answers (Translated)", lines=10),
|
119 |
-
gr.components.Textbox(label="Options (Translated)", lines=30)
|
120 |
],
|
121 |
title="YouTube Video Subtitle to MCQs Quiz",
|
122 |
description="Generate MCQs from YouTube video subtitles"
|
123 |
)
|
124 |
|
125 |
if __name__ == '__main__':
|
126 |
-
iface.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import pytube
|
3 |
from youtube_transcript_api import YouTubeTranscriptApi as yt
|
4 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
5 |
import os
|
6 |
from langchain import PromptTemplate
|
7 |
from langchain import LLMChain
|
|
|
20 |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
|
21 |
return summary[0]
|
22 |
|
23 |
+
def YtToQuizz(link, difficulty_level):
|
|
|
|
|
|
|
|
|
|
|
24 |
video_id = pytube.extract.video_id(link)
|
25 |
transcript = yt.get_transcript(video_id)
|
26 |
+
data = ""
|
27 |
+
for text in transcript:
|
28 |
+
data += text.get('text') + " "
|
29 |
summary = Summary_BART(data)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
mcq_template = """
|
32 |
Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
|
33 |
The difficulty level of the questions should be: {difficulty_level}
|
|
|
36 |
2. Correct answer
|
37 |
3. Three plausible incorrect answer options
|
38 |
4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
|
|
|
39 |
"""
|
40 |
prompt = PromptTemplate(
|
41 |
+
input_variables=['summary', 'difficulty_level'],
|
42 |
template=mcq_template
|
43 |
)
|
44 |
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
|
45 |
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
|
46 |
|
47 |
+
response = Generated_mcqs.invoke({
|
48 |
"summary": summary,
|
49 |
+
"difficulty_level": difficulty_level
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
})
|
51 |
|
52 |
+
response_text = response['text']
|
|
|
53 |
|
54 |
# Extract MCQs
|
55 |
mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
|
56 |
+
mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
if len(mcqs) < 10:
|
59 |
+
return ["Failed to generate 10 complete MCQs. Please try again."] * 3
|
60 |
|
61 |
+
questions = []
|
62 |
+
correct_answers = []
|
63 |
+
options = []
|
|
|
|
|
|
|
64 |
|
65 |
+
for idx, mcq in enumerate(mcqs[:10]):
|
66 |
question, correct_answer, incorrect_answers = mcq
|
67 |
incorrect_answers = incorrect_answers.split(', ')
|
68 |
+
questions.append(f"Q{idx+1}: {question}")
|
69 |
+
correct_answers.append(f"Q{idx+1}: {correct_answer}")
|
70 |
+
options.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
|
71 |
|
72 |
+
return questions, correct_answers, options
|
73 |
|
74 |
+
def main(link, difficulty_level):
|
75 |
+
return YtToQuizz(link, difficulty_level)
|
76 |
|
77 |
iface = gr.Interface(
|
78 |
fn=main,
|
79 |
inputs=[
|
80 |
gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
|
81 |
+
gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
|
|
|
|
|
|
|
|
|
82 |
],
|
83 |
outputs=[
|
84 |
+
gr.components.Textbox(label="MCQs Statements", lines=20),
|
85 |
+
gr.components.Textbox(label="Correct Answers", lines=10),
|
86 |
+
gr.components.Textbox(label="Options", lines=30)
|
|
|
|
|
|
|
87 |
],
|
88 |
title="YouTube Video Subtitle to MCQs Quiz",
|
89 |
description="Generate MCQs from YouTube video subtitles"
|
90 |
)
|
91 |
|
92 |
if __name__ == '__main__':
|
93 |
+
iface.launch()
|