Spaces:

Rehman1603
/

pdf_to_mcqs

Sleeping

App Files Files Community

pdf_to_mcqs / app.py

Rehman1603

Update app.py

3d571e7 verified about 1 month ago

raw

history blame contribute delete

4.03 kB

	import re
	from docx import Document
	import os
	import gradio as gr
	from PyPDF2 import PdfReader
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from langchain import PromptTemplate
	from langchain import LLMChain
	from langchain_together import Together

	# Initialize Together API key
	os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
	checkpoint = "sshleifer/distilbart-cnn-12-6"
	llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

	def Summary_BART(text):
	inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
	summary_ids = model.generate(inputs["input_ids"])
	summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
	return summary[0]

	def DocToQuizz(file, difficulty_level):
	# Read the PDF content
	reader = PdfReader(file.name) # Use `file.name` to access the uploaded file path
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	summary = Summary_BART(text)

	# Define the prompt template for generating questions
	mcq_template = """
	Generate 20 different multiple-choice questions (MCQs) based on the following summary: {summary}
	The difficulty level of the questions should be: {difficulty_level}
	For each MCQ, please provide the following:
	1. Question
	- Use varied question formats such as:
	- "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
	- Ensure questions are logically phrased and relevant to the content.
	2. Correct answer
	3. Three plausible incorrect answer options
	4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"
	"""
	prompt = PromptTemplate(
	input_variables=['summary', 'difficulty_level'],
	template=mcq_template
	)

	Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

	response = Generated_mcqs.invoke({
	"summary": summary,
	"difficulty_level": difficulty_level
	})

	response_text = response['text']
	print(response_text)

	# Updated MCQ pattern to match the format in the response
	mcq_pattern = r'Question:\s(.?)\nCorrect answer:\s(.?)\nIncorrect answers:\s(.?)\n'
	mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
	print(f"the mcqs from pattern{mcqs}")

	# Initialize a Word document
	doc = Document()
	doc.add_heading("Physics Questions", level=1)

	# Add a section for MCQs with options
	doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
	for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
	# Split incorrect answers
	incorrect_answers = incorrect_answers.split(', ')

	# Add question and options to the document
	doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
	doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
	for i, incorrect in enumerate(incorrect_answers, start=2):
	doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")

	# Save the document
	doc.save("mcqs_Questions.docx")
	return "mcqs_Questions.docx"

	# Gradio Interface
	def generate_quiz(file, difficulty_level):
	output_file = DocToQuizz(file, difficulty_level)
	return output_file

	interface = gr.Interface(
	fn=generate_quiz,
	inputs=[
	gr.File(file_types=[".pdf"], label="Upload PDF File"), # Allow file upload
	gr.Dropdown(["Easy", "Medium", "Hard"], label="Select Difficulty Level")
	],
	outputs=gr.File(label="Download Quiz Document"),
	title="Quiz Generator",
	description="Upload a PDF file and select a difficulty level to generate quiz questions."
	)

	# Launch the interface
	interface.launch(debug=True)