Rehman1603 commited on
Commit
76e82bd
·
verified ·
1 Parent(s): 0b7048f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -26
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import pytube
3
  from youtube_transcript_api import YouTubeTranscriptApi as yt
4
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
5
  import os
6
  from langchain import PromptTemplate
7
  from langchain import LLMChain
@@ -20,14 +20,24 @@ def Summary_BART(text):
20
  summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
21
  return summary[0]
22
 
23
- def YtToQuizz(link, difficulty_level):
 
 
 
 
 
24
  video_id = pytube.extract.video_id(link)
25
  transcript = yt.get_transcript(video_id)
26
- data = ""
27
- for text in transcript:
28
- data += text.get('text') + " "
29
  summary = Summary_BART(data)
30
 
 
 
 
 
 
 
31
  mcq_template = """
32
  Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
33
  The difficulty level of the questions should be: {difficulty_level}
@@ -36,54 +46,77 @@ def YtToQuizz(link, difficulty_level):
36
  2. Correct answer
37
  3. Three plausible incorrect answer options
38
  4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
 
39
  """
40
  prompt = PromptTemplate(
41
- input_variables=['summary', 'difficulty_level'],
42
  template=mcq_template
43
  )
44
  llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
45
  Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
46
 
47
- response = Generated_mcqs.invoke({
48
  "summary": summary,
49
- "difficulty_level": difficulty_level
 
 
 
 
 
 
 
50
  })
51
 
52
- response_text = response['text']
 
53
 
54
  # Extract MCQs
55
  mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
56
- mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
 
 
 
 
57
 
58
- if len(mcqs) < 10:
59
- return ["Failed to generate 10 complete MCQs. Please try again."] * 3
60
 
61
- questions = []
62
- correct_answers = []
63
- options = []
 
 
 
64
 
65
- for idx, mcq in enumerate(mcqs[:10]):
66
  question, correct_answer, incorrect_answers = mcq
67
  incorrect_answers = incorrect_answers.split(', ')
68
- questions.append(f"Q{idx+1}: {question}")
69
- correct_answers.append(f"Q{idx+1}: {correct_answer}")
70
- options.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
71
 
72
- return questions, correct_answers, options
73
 
74
- def main(link, difficulty_level):
75
- return YtToQuizz(link, difficulty_level)
76
 
77
  iface = gr.Interface(
78
  fn=main,
79
  inputs=[
80
  gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
81
- gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
 
 
 
 
82
  ],
83
  outputs=[
84
- gr.components.Textbox(label="MCQs Statements", lines=20),
85
- gr.components.Textbox(label="Correct Answers", lines=10),
86
- gr.components.Textbox(label="Options", lines=30)
 
 
 
87
  ],
88
  title="YouTube Video Subtitle to MCQs Quiz",
89
  description="Generate MCQs from YouTube video subtitles"
 
1
  import gradio as gr
2
  import pytube
3
  from youtube_transcript_api import YouTubeTranscriptApi as yt
4
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
  import os
6
  from langchain import PromptTemplate
7
  from langchain import LLMChain
 
20
  summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
21
  return summary[0]
22
 
23
+ def translate_text(text, target_language):
24
+ translator = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{target_language}")
25
+ translated_text = translator(text, max_length=512)
26
+ return translated_text[0]['translation_text']
27
+
28
+ def YtToQuizz(link, difficulty_level, language):
29
  video_id = pytube.extract.video_id(link)
30
  transcript = yt.get_transcript(video_id)
31
+ data = " ".join([text['text'] for text in transcript])
32
+
 
33
  summary = Summary_BART(data)
34
 
35
+ if language != "en":
36
+ translated_data = translate_text(data, language)
37
+ translated_summary = Summary_BART(translated_data)
38
+ else:
39
+ translated_summary = summary
40
+
41
  mcq_template = """
42
  Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary}
43
  The difficulty level of the questions should be: {difficulty_level}
 
46
  2. Correct answer
47
  3. Three plausible incorrect answer options
48
  4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>"
49
+ The language of the questions should be: {language}
50
  """
51
  prompt = PromptTemplate(
52
+ input_variables=['summary', 'difficulty_level', 'language'],
53
  template=mcq_template
54
  )
55
  llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
56
  Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
57
 
58
+ response_en = Generated_mcqs.invoke({
59
  "summary": summary,
60
+ "difficulty_level": difficulty_level,
61
+ "language": "English"
62
+ })
63
+
64
+ response_translated = Generated_mcqs.invoke({
65
+ "summary": translated_summary,
66
+ "difficulty_level": difficulty_level,
67
+ "language": language
68
  })
69
 
70
+ response_text_en = response_en['text']
71
+ response_text_translated = response_translated['text']
72
 
73
  # Extract MCQs
74
  mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)'
75
+ mcqs_en = re.findall(mcq_pattern, response_text_en, re.DOTALL)
76
+ mcqs_translated = re.findall(mcq_pattern, response_text_translated, re.DOTALL)
77
+
78
+ if len(mcqs_en) < 10 or len(mcqs_translated) < 10:
79
+ return ["Failed to generate 10 complete MCQs. Please try again."] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3, [""] * 3
80
 
81
+ questions_en, correct_answers_en, options_en = [], [], []
82
+ questions_translated, correct_answers_translated, options_translated = [], [], []
83
 
84
+ for idx, mcq in enumerate(mcqs_en[:10]):
85
+ question, correct_answer, incorrect_answers = mcq
86
+ incorrect_answers = incorrect_answers.split(', ')
87
+ questions_en.append(f"Q{idx+1}: {question}")
88
+ correct_answers_en.append(f"Q{idx+1}: {correct_answer}")
89
+ options_en.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
90
 
91
+ for idx, mcq in enumerate(mcqs_translated[:10]):
92
  question, correct_answer, incorrect_answers = mcq
93
  incorrect_answers = incorrect_answers.split(', ')
94
+ questions_translated.append(f"Q{idx+1}: {question}")
95
+ correct_answers_translated.append(f"Q{idx+1}: {correct_answer}")
96
+ options_translated.append(f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}")
97
 
98
+ return questions_en, correct_answers_en, options_en, questions_translated, correct_answers_translated, options_translated
99
 
100
+ def main(link, difficulty_level, language):
101
+ return YtToQuizz(link, difficulty_level, language)
102
 
103
  iface = gr.Interface(
104
  fn=main,
105
  inputs=[
106
  gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
107
+ gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:"),
108
+ gr.components.Dropdown(
109
+ ["en", "fr", "es", "de", "it", "pt", "nl", "ru", "zh", "ja", "ko"],
110
+ label="Select language:"
111
+ )
112
  ],
113
  outputs=[
114
+ gr.components.Textbox(label="MCQs Statements (English)", lines=20),
115
+ gr.components.Textbox(label="Correct Answers (English)", lines=10),
116
+ gr.components.Textbox(label="Options (English)", lines=30),
117
+ gr.components.Textbox(label="MCQs Statements (Translated)", lines=20),
118
+ gr.components.Textbox(label="Correct Answers (Translated)", lines=10),
119
+ gr.components.Textbox(label="Options (Translated)", lines=30)
120
  ],
121
  title="YouTube Video Subtitle to MCQs Quiz",
122
  description="Generate MCQs from YouTube video subtitles"