Spaces:

Rehman1603
/

Paper_Gen_Physics

Sleeping

App Files Files Community

Paper_Gen_Physics / app.py

Rehman1603

Create app.py

7ff0a58 verified 5 months ago

raw

history blame contribute delete

5.04 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
	from langchain import PromptTemplate
	from langchain import LLMChain
	from langchain_together import Together
	import re
	from docx import Document
	import os

	# Initialize Together API key
	os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
	checkpoint = "sshleifer/distilbart-cnn-12-6"
	llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
	tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
	def Summary_BART(text):
	inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
	summary_ids = model.generate(inputs["input_ids"])
	summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
	return summary[0]

	def DocToQuizz(file, difficulty_level):
	# Read the PDF content
	reader = PdfReader(file)
	text = ""
	for page in reader.pages:
	text += page.extract_text()
	summary = Summary_BART(text)

	# Define the prompt template for generating questions
	mcq_template = """
	Generate 20 different questions based on the following summary: {summary}
	The difficulty level of the questions should be: {difficulty_level}

	For the multiple-choice questions (MCQs), please provide the following for each question:
	1. Question
	- Use varied question formats such as:
	- "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
	- Ensure questions are logically phrased and relevant to the content.
	2. Correct answer
	3. Three plausible incorrect answer options
	4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"

	For the short questions, please provide:
	1. Question
	- Use varied question formats to encourage conceptual understanding and avoid repetition.
	- Ensure the short questions do not overlap in content with the MCQs.
	2. Short, concise answer
	3. Format: "SQ: <question text>\nAnswer: <answer>"

	Generate 10 MCQs and 10 unique short questions in total, ensuring diverse question structures and logical phrasing.
	"""
	prompt = PromptTemplate(
	input_variables=['summary', 'difficulty_level'],
	template=mcq_template
	)

	Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

	response = Generated_mcqs.invoke({
	"summary": summary,
	"difficulty_level": difficulty_level
	})

	response_text = response['text']

	# Extract MCQs and Short Questions
	mcq_pattern = r'\d+\.\sQuestion:\s(.?)\nCorrect answer:\s(.?)\nIncorrect answers:\s(.*?)\n'
	short_question_pattern = r'\d+\.\sSQ:\s(.*?)\n'

	mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
	short_questions = re.findall(short_question_pattern, response_text, re.DOTALL)

	# Initialize a Word document
	doc = Document()
	doc.add_heading("Physics Questions", level=1)

	# Add a section for MCQs with options
	doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
	for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
	# Split incorrect answers
	incorrect_answers = incorrect_answers.split(', ')

	# Add question and options to the document
	doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
	doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
	for i, incorrect in enumerate(incorrect_answers, start=2):
	doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")

	# Add a page break and section for Short Questions
	doc.add_page_break()
	doc.add_heading("Short Questions", level=2)
	for idx, question in enumerate(short_questions, start=1):
	doc.add_paragraph(f"{idx}. {question.strip()}", style="Body Text")

	# Save the document
	doc.save("Physics_Questions.docx")
	return "Physics_Questions.docx"

	# Get list of PDF files in the directory
	pdf_files = ['output_range_1.pdf','output_range_2.pdf','output_range_3.pdf','output_range_4.pdf','output_range_5.pdf','output_range_6.pdf','output_range_7.pdf','output_range_8.pdf','output_range_9.pdf']
	difficulty_levels = ["Easy", "Medium", "Hard"]

	# Gradio Interface
	def generate_quiz(file, difficulty_level):
	output_file = DocToQuizz(file, difficulty_level)
	return output_file

	interface = gr.Interface(
	fn=generate_quiz,
	inputs=[
	gr.Dropdown(pdf_files, label="Select PDF File"),
	gr.Dropdown(difficulty_levels, label="Select Difficulty Level",value='output_range_1.pdf')
	],
	outputs=gr.File(label="Download Quiz Document"),
	title="Quiz Generator",
	description="Select a PDF file and difficulty level to generate quiz questions."
	)

	# Launch the interface
	interface.launch(debug=True)