Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pytube | |
from youtube_transcript_api import YouTubeTranscriptApi as yt | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import os | |
from langchain import PromptTemplate | |
from langchain import LLMChain | |
from langchain_together import Together | |
import re | |
# Set the API key with double quotes | |
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5" | |
def Summary_BART(text): | |
checkpoint = "sshleifer/distilbart-cnn-12-6" | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") | |
summary_ids = model.generate(inputs["input_ids"]) | |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) | |
return summary[0] | |
def YtToQuizz(link, difficulty_level): | |
video_id = pytube.extract.video_id(link) | |
transcript = yt.get_transcript(video_id) | |
data = "" | |
for text in transcript: | |
data += text.get('text') + " " | |
summary = Summary_BART(data) | |
mcq_template = """ | |
Generate 10 different multiple-choice questions (MCQs) related to the following summary: {summary} | |
The difficulty level of the questions should be: {difficulty_level} | |
Please provide the following for each question: | |
1. Question | |
2. Correct answer | |
3. Three plausible incorrect answer options | |
4. Format: "Question: <question text>\\nCorrect answer: <correct answer>\\nIncorrect answers: <option1>, <option2>, <option3>" | |
""" | |
prompt = PromptTemplate( | |
input_variables=['summary', 'difficulty_level'], | |
template=mcq_template | |
) | |
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500) | |
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt) | |
response = Generated_mcqs.invoke({ | |
"summary": summary, | |
"difficulty_level": difficulty_level | |
}) | |
response_text = response['text'] | |
# Extract MCQs | |
mcq_pattern = r'Question: (.*?)\nCorrect answer: (.*?)\nIncorrect answers: (.*?)(?:\n|$)' | |
mcqs = re.findall(mcq_pattern, response_text, re.DOTALL) | |
if len(mcqs) < 10: | |
return "Failed to generate 10 complete MCQs. Please try again.", "", "" | |
questions_str = "" | |
correct_answers_str = "" | |
options_str = "" | |
for idx, mcq in enumerate(mcqs[:10]): | |
question, correct_answer, incorrect_answers = mcq | |
incorrect_answers = incorrect_answers.split(', ') | |
questions_str += f"Q{idx+1}: {question}, " | |
correct_answers_str += f"Q{idx+1}: {correct_answer}, " | |
options_str += f"Q{idx+1}: A) {correct_answer}, B) {incorrect_answers[0]}, C) {incorrect_answers[1]}, D) {incorrect_answers[2]}, " | |
# Removing the trailing comma and space | |
questions_str = questions_str.rstrip(", ") | |
correct_answers_str = correct_answers_str.rstrip(", ") | |
options_str = options_str.rstrip(", ") | |
return questions_str, correct_answers_str, options_str | |
def main(link, difficulty_level): | |
return YtToQuizz(link, difficulty_level) | |
iface = gr.Interface( | |
fn=main, | |
inputs=[ | |
gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"), | |
gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:") | |
], | |
outputs=[ | |
gr.components.Textbox(label="MCQs Statements", lines=20), | |
gr.components.Textbox(label="Correct Answers", lines=10), | |
gr.components.Textbox(label="Options", lines=30) | |
], | |
title="YouTube Video Subtitle to MCQs Quiz", | |
description="Generate MCQs from YouTube video subtitles" | |
) | |
if __name__ == '__main__': | |
iface.launch() | |