Spaces:

Rehman1603
/

YouTube_to_Quiz

Sleeping

App Files Files Community

Rehman1603 commited on Jun 24, 2024

Commit

76e82bd

verified ·

1 Parent(s): 0b7048f

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -26

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 import pytube
 from youtube_transcript_api import YouTubeTranscriptApi as yt
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
 import os
 from langchain import PromptTemplate
 from langchain import LLMChain
@@ -20,14 +20,24 @@ def Summary_BART(text):
     summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
     return summary[0]
-def YtToQuizz(link, difficulty_level):
     video_id = pytube.extract.video_id(link)
     transcript = yt.get_transcript(video_id)
-    data = ""
-    for text in transcript:
-        data += text.get('text') + " "
     summary = Summary_BART(data)
     mcq_template = """
     Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
     The difficulty level of the questions should be: {difficulty_level}
@@ -36,54 +46,77 @@ def YtToQuizz(link, difficulty_level):
     2. Correct answer
     3. Three plausible incorrect answer options
     4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
     """
     prompt = PromptTemplate(
-        input_variables=['summary', 'difficulty_level'],
         template=mcq_template
     )
     llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
     Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
-    response = Generated_mcqs.invoke({
         "summary": summary,
-        "difficulty_level": difficulty_level
     })
-    response_text = response['text']
     # Extract MCQs
     mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
-    mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
-    if len(mcqs) < 10:
-        return ["Failed to generate 10 complete MCQs. Please try again."] * 3
-    questions = []
-    correct_answers = []
-    options = []
-    for idx, mcq in enumerate(mcqs[:10]):
         question, correct_answer, incorrect_answers = mcq
         incorrect_answers = incorrect_answers.split(', ')
-        questions.append(f"Q{idx+1}: {question}")
-        correct_answers.append(f"Q{idx+1}: {correct_answer}")
-        options.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
-    return questions, correct_answers, options
-def main(link, difficulty_level):
-    return YtToQuizz(link, difficulty_level)
 iface = gr.Interface(
     fn=main,
     inputs=[
         gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
-        gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
     ],
     outputs=[
-        gr.components.Textbox(label="MCQs Statements", lines=20),
-        gr.components.Textbox(label="Correct Answers", lines=10),
-        gr.components.Textbox(label="Options", lines=30)
     ],
     title="YouTube Video Subtitle to MCQs Quiz",
     description="Generate MCQs from YouTube video subtitles"

 import gradio as gr
 import pytube
 from youtube_transcript_api import YouTubeTranscriptApi as yt
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 import os
 from langchain import PromptTemplate
 from langchain import LLMChain
     summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
     return summary[0]
+def translate_text(text, target_language):
+    translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
+    translated_text = translator(text, max_length=512)
+    return translated_text[0]['translation_text']
+def YtToQuizz(link, difficulty_level, language):
     video_id = pytube.extract.video_id(link)
     transcript = yt.get_transcript(video_id)
+    data = " ".join([text['text'] for text in transcript])
     summary = Summary_BART(data)
+    if language != "en":
+        translated_data = translate_text(data, language)
+        translated_summary = Summary_BART(translated_data)
+    else:
+        translated_summary = summary
     mcq_template = """
     Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
     The difficulty level of the questions should be: {difficulty_level}
     2. Correct answer
     3. Three plausible incorrect answer options
     4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
+    The language of the questions should be: {language}
     """
     prompt = PromptTemplate(
+        input_variables=['summary', 'difficulty_level', 'language'],
         template=mcq_template
     )
     llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
     Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
+    response_en = Generated_mcqs.invoke({
         "summary": summary,
+        "difficulty_level": difficulty_level,
+        "language": "English"
+    })
+    response_translated = Generated_mcqs.invoke({
+        "summary": translated_summary,
+        "difficulty_level": difficulty_level,
+        "language": language
     })
+    response_text_en = response_en['text']
+    response_text_translated = response_translated['text']
     # Extract MCQs
     mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
+    mcqs_en = re.findall(mcq_pattern, response_text_en, re.DOTALL)
+    mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL)
+    if len(mcqs_en) < 10 or len(mcqs_translated) < 10:
+        return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3
+    questions_en, correct_answers_en, options_en = [], [], []
+    questions_translated, correct_answers_translated, options_translated = [], [], []
+    for idx, mcq in enumerate(mcqs_en[:10]):
+        question, correct_answer, incorrect_answers = mcq
+        incorrect_answers = incorrect_answers.split(', ')
+        questions_en.append(f"Q{idx+1}: {question}")
+        correct_answers_en.append(f"Q{idx+1}: {correct_answer}")
+        options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
+    for idx, mcq in enumerate(mcqs_translated[:10]):
         question, correct_answer, incorrect_answers = mcq
         incorrect_answers = incorrect_answers.split(', ')
+        questions_translated.append(f"Q{idx+1}: {question}")
+        correct_answers_translated.append(f"Q{idx+1}: {correct_answer}")
+        options_translated.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
+    return questions_en, correct_answers_en, options_en, questions_translated, correct_answers_translated, options_translated
+def main(link, difficulty_level, language):
+    return YtToQuizz(link, difficulty_level, language)
 iface = gr.Interface(
     fn=main,
     inputs=[
         gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
+        gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:"),
+        gr.components.Dropdown(
+            ["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"],
+            label="Select language:"
+        )
     ],
     outputs=[
+        gr.components.Textbox(label="MCQs Statements (English)", lines=20),
+        gr.components.Textbox(label="Correct Answers (English)", lines=10),
+        gr.components.Textbox(label="Options (English)", lines=30),
+        gr.components.Textbox(label="MCQs Statements (Translated)", lines=20),
+        gr.components.Textbox(label="Correct Answers (Translated)", lines=10),
+        gr.components.Textbox(label="Options (Translated)", lines=30)
     ],
     title="YouTube Video Subtitle to MCQs Quiz",
     description="Generate MCQs from YouTube video subtitles"