Spaces:

Rehman1603
/

YouTube_to_Quiz

Sleeping

App Files Files Community

YouTube_to_Quiz / app.py

Rehman1603

Update app.py

23c6c56 verified 9 months ago

raw

history blame

2.93 kB

	import gradio as gr
	import pytube
	from youtube_transcript_api import YouTubeTranscriptApi as yt
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	import os
	from langchain import PromptTemplate
	from langchain import LLMChain
	from langchain_together import Together

	# Set the API key with double quotes
	os.environ['TOGETHER_API_KEY'] = "d88cb7414e4039a84d2ed63f1b47daaaa4230c4c53a422045d8a30a9a3bc87d8"

	def Summary_BART(text):
	checkpoint = "sshleifer/distilbart-cnn-12-6"
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
	inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
	summary_ids = model.generate(inputs["input_ids"])
	summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
	return summary[0]

	def YtToQuizz(link, difficulty_level):
	video_id = pytube.extract.video_id(link)
	transcript = yt.get_transcript(video_id)
	data = ""
	for text in transcript:
	data += text.get('text')
	summary = Summary_BART(data)

	mcq_template = """
	Give a 10 different multiple-choice question MCQ related to the summary: {summary}
	The difficulty level of the question should be: {difficulty_level}
	Please provide the following in:
	1. Question
	2. Correct answer
	3. Three plausible incorrect answer options
	4. Proper MCQ format
	"""
	prompt = PromptTemplate(
	input_variables=['summary', 'difficulty_level'],
	template=mcq_template
	)
	llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
	Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

	response = Generated_mcqs.invoke({
	"summary": summary,
	"difficulty_level": difficulty_level
	})

	response_text = response['text']

	# Extract MCQs, correct answers, and options
	questions = re.findall(r'Question: (.*?)\n', response_text)
	correct_answers = re.findall(r'Correct answer: (.*?)\n', response_text)
	options = re.findall(r'Options: (.*?)\n', response_text)
	all_options = [option.split(', ') for option in options]

	return questions, all_options, correct_answers

	def main(link, difficulty_level):
	questions, options, correct_answers = YtToQuizz(link, difficulty_level)
	return {
	"Questions": questions,
	"Options": options,
	"Correct Answers": correct_answers
	}

	iface = gr.Interface(
	fn=main,
	inputs=[
	gr.components.Textbox(lines=2, placeholder="Enter YouTube video link"),
	gr.components.Dropdown(["Easy", "Medium", "Hard"], label="Select difficulty level:")
	],
	outputs=[
	gr.components.JSON(label="MCQs Output")
	],
	title="YouTube Video Subtitle to MCQs Quiz",
	description="Generate MCQs from YouTube video subtitles"
	)

	if __name__ == '__main__':
	iface.launch()