Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pytube | |
from youtube_transcript_api import YouTubeTranscriptApi as yt | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import os | |
from langchain import PromptTemplate | |
from langchain import LLMChain | |
from langchain_together import Together | |
# Set the API key with double quotes | |
os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8" | |
def Summary_BART(text): | |
checkpoint = "sshleifer/distilbart-cnn-12-6" | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") | |
summary_ids = model.generate(inputs["input_ids"]) | |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) | |
return summary[0] | |
def YtToQuizz(link, difficulty_level): | |
video_id = pytube.extract.video_id(link) | |
transcript = yt.get_transcript(video_id) | |
data = "" | |
for text in transcript: | |
data += text.get('text') | |
summary = Summary_BART(data) | |
mcq_template = """ | |
Give a 10 different multiple-choice question MCQ related to the summary: {summary} | |
The difficulty level of the question should be: {difficulty_level} | |
Please provide the following in: | |
1. Question | |
2. Correct answer | |
3. Three plausible incorrect answer options | |
4. Proper MCQ format | |
""" | |
prompt = PromptTemplate( | |
input_variables=['summary', 'difficulty_level'], | |
template=mcq_template | |
) | |
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500) | |
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt) | |
response = Generated_mcqs.invoke({ | |
"summary": summary, | |
"difficulty_level": difficulty_level | |
}) | |
response_text = response['text'] | |
# Extract MCQs, correct answers, and options | |
questions = re.findall(r'Question: (.*?)\n', response_text) | |
correct_answers = re.findall(r'Correct answer: (.*?)\n', response_text) | |
options = re.findall(r'Options: (.*?)\n', response_text) | |
all_options = [option.split(', ') for option in options] | |
return questions, all_options, correct_answers | |
def main(link, difficulty_level): | |
questions, options, correct_answers = YtToQuizz(link, difficulty_level) | |
return { | |
"Questions": questions, | |
"Options": options, | |
"Correct Answers": correct_answers | |
} | |
iface = gr.Interface( | |
fn=main, | |
inputs=[ | |
gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"), | |
gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:") | |
], | |
outputs=[ | |
gr.components.JSON(label="MCQs Output") | |
], | |
title="YouTube Video Subtitle to MCQs Quiz", | |
description="Generate MCQs from YouTube video subtitles" | |
) | |
if __name__ == '__main__': | |
iface.launch() | |