Rehman1603's picture
Create app.py
7ff0a58 verified
import gradio as gr
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
import re
from docx import Document
import os
# Initialize Together API key
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
checkpoint = "sshleifer/distilbart-cnn-12-6"
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
def Summary_BART(text):
inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"])
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
return summary[0]
def DocToQuizz(file, difficulty_level):
# Read the PDF content
reader = PdfReader(file)
text = ""
for page in reader.pages:
text += page.extract_text()
summary = Summary_BART(text)
# Define the prompt template for generating questions
mcq_template = """
Generate 20 different questions based on the following summary: {summary}
The difficulty level of the questions should be: {difficulty_level}
For the multiple-choice questions (MCQs), please provide the following for each question:
1. Question
- Use varied question formats such as:
- "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
- Ensure questions are logically phrased and relevant to the content.
2. Correct answer
3. Three plausible incorrect answer options
4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"
For the short questions, please provide:
1. Question
- Use varied question formats to encourage conceptual understanding and avoid repetition.
- Ensure the short questions do not overlap in content with the MCQs.
2. Short, concise answer
3. Format: "SQ: <question text>\nAnswer: <answer>"
Generate 10 MCQs and 10 unique short questions in total, ensuring diverse question structures and logical phrasing.
"""
prompt = PromptTemplate(
input_variables=['summary', 'difficulty_level'],
template=mcq_template
)
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
response = Generated_mcqs.invoke({
"summary": summary,
"difficulty_level": difficulty_level
})
response_text = response['text']
# Extract MCQs and Short Questions
mcq_pattern = r'\d+\.\s*Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n'
short_question_pattern = r'\d+\.\s*SQ:\s*(.*?)\n'
mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
short_questions = re.findall(short_question_pattern, response_text, re.DOTALL)
# Initialize a Word document
doc = Document()
doc.add_heading("Physics Questions", level=1)
# Add a section for MCQs with options
doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
# Split incorrect answers
incorrect_answers = incorrect_answers.split(', ')
# Add question and options to the document
doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
for i, incorrect in enumerate(incorrect_answers, start=2):
doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")
# Add a page break and section for Short Questions
doc.add_page_break()
doc.add_heading("Short Questions", level=2)
for idx, question in enumerate(short_questions, start=1):
doc.add_paragraph(f"{idx}. {question.strip()}", style="Body Text")
# Save the document
doc.save("Physics_Questions.docx")
return "Physics_Questions.docx"
# Get list of PDF files in the directory
pdf_files = ['output_range_1.pdf','output_range_2.pdf','output_range_3.pdf','output_range_4.pdf','output_range_5.pdf','output_range_6.pdf','output_range_7.pdf','output_range_8.pdf','output_range_9.pdf']
difficulty_levels = ["Easy", "Medium", "Hard"]
# Gradio Interface
def generate_quiz(file, difficulty_level):
output_file = DocToQuizz(file, difficulty_level)
return output_file
interface = gr.Interface(
fn=generate_quiz,
inputs=[
gr.Dropdown(pdf_files, label="Select PDF File"),
gr.Dropdown(difficulty_levels, label="Select Difficulty Level",value='output_range_1.pdf')
],
outputs=gr.File(label="Download Quiz Document"),
title="Quiz Generator",
description="Select a PDF file and difficulty level to generate quiz questions."
)
# Launch the interface
interface.launch(debug=True)