File size: 5,040 Bytes
7ff0a58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import gradio as gr
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
import re
from docx import Document
import os

# Initialize Together API key
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
checkpoint = "sshleifer/distilbart-cnn-12-6"
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
def Summary_BART(text):
    inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"])
    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    return summary[0]

def DocToQuizz(file, difficulty_level):
    # Read the PDF content
    reader = PdfReader(file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    summary = Summary_BART(text)

    # Define the prompt template for generating questions
    mcq_template = """
    Generate 20 different questions based on the following summary: {summary}
    The difficulty level of the questions should be: {difficulty_level}

    For the multiple-choice questions (MCQs), please provide the following for each question:
    1. Question
       - Use varied question formats such as:
         - "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
       - Ensure questions are logically phrased and relevant to the content.
    2. Correct answer
    3. Three plausible incorrect answer options
    4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"

    For the short questions, please provide:
    1. Question
       - Use varied question formats to encourage conceptual understanding and avoid repetition.
       - Ensure the short questions do not overlap in content with the MCQs.
    2. Short, concise answer
    3. Format: "SQ: <question text>\nAnswer: <answer>"

    Generate 10 MCQs and 10 unique short questions in total, ensuring diverse question structures and logical phrasing.
    """
    prompt = PromptTemplate(
        input_variables=['summary', 'difficulty_level'],
        template=mcq_template
    )
    
    Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

    response = Generated_mcqs.invoke({
        "summary": summary,
        "difficulty_level": difficulty_level
    })

    response_text = response['text']

    # Extract MCQs and Short Questions
    mcq_pattern = r'\d+\.\s*Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n'
    short_question_pattern = r'\d+\.\s*SQ:\s*(.*?)\n'

    mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
    short_questions = re.findall(short_question_pattern, response_text, re.DOTALL)

    # Initialize a Word document
    doc = Document()
    doc.add_heading("Physics Questions", level=1)

    # Add a section for MCQs with options
    doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
    for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
        # Split incorrect answers
        incorrect_answers = incorrect_answers.split(', ')

        # Add question and options to the document
        doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
        doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
        for i, incorrect in enumerate(incorrect_answers, start=2):
            doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")

    # Add a page break and section for Short Questions
    doc.add_page_break()
    doc.add_heading("Short Questions", level=2)
    for idx, question in enumerate(short_questions, start=1):
        doc.add_paragraph(f"{idx}. {question.strip()}", style="Body Text")

    # Save the document
    doc.save("Physics_Questions.docx")
    return "Physics_Questions.docx"

# Get list of PDF files in the directory
pdf_files = ['output_range_1.pdf','output_range_2.pdf','output_range_3.pdf','output_range_4.pdf','output_range_5.pdf','output_range_6.pdf','output_range_7.pdf','output_range_8.pdf','output_range_9.pdf']
difficulty_levels = ["Easy", "Medium", "Hard"]

# Gradio Interface
def generate_quiz(file, difficulty_level):
    output_file = DocToQuizz(file, difficulty_level)
    return output_file

interface = gr.Interface(
    fn=generate_quiz,
    inputs=[
        gr.Dropdown(pdf_files, label="Select PDF File"),
        gr.Dropdown(difficulty_levels, label="Select Difficulty Level",value='output_range_1.pdf')
    ],
    outputs=gr.File(label="Download Quiz Document"),
    title="Quiz Generator",
    description="Select a PDF file and difficulty level to generate quiz questions."
)

# Launch the interface
interface.launch(debug=True)