Spaces:

Rehman1603
/

pdf_to_mcqs

Sleeping

File size: 4,034 Bytes

3d571e7

import re
from docx import Document
import os
import gradio as gr
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together

# Initialize Together API key
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
checkpoint = "sshleifer/distilbart-cnn-12-6"
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)

def Summary_BART(text):
    inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
    summary_ids = model.generate(inputs["input_ids"])
    summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
    return summary[0]

def DocToQuizz(file, difficulty_level):
    # Read the PDF content
    reader = PdfReader(file.name)  # Use `file.name` to access the uploaded file path
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    summary = Summary_BART(text)

    # Define the prompt template for generating questions
    mcq_template = """
    Generate 20 different multiple-choice questions (MCQs) based on the following summary: {summary}
    The difficulty level of the questions should be: {difficulty_level}
    For each MCQ, please provide the following:
    1. Question
       - Use varied question formats such as:
         - "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
       - Ensure questions are logically phrased and relevant to the content.
    2. Correct answer
    3. Three plausible incorrect answer options
    4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"
    """
    prompt = PromptTemplate(
        input_variables=['summary', 'difficulty_level'],
        template=mcq_template
    )

    Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)

    response = Generated_mcqs.invoke({
        "summary": summary,
        "difficulty_level": difficulty_level
    })

    response_text = response['text']
    print(response_text)
    
    # Updated MCQ pattern to match the format in the response
    mcq_pattern = r'Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n'
    mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
    print(f"the mcqs from pattern{mcqs}")
    
    # Initialize a Word document
    doc = Document()
    doc.add_heading("Physics Questions", level=1)

    # Add a section for MCQs with options
    doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
    for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
        # Split incorrect answers
        incorrect_answers = incorrect_answers.split(', ')

        # Add question and options to the document
        doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
        doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
        for i, incorrect in enumerate(incorrect_answers, start=2):
            doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")

    # Save the document
    doc.save("mcqs_Questions.docx")
    return "mcqs_Questions.docx"

# Gradio Interface
def generate_quiz(file, difficulty_level):
    output_file = DocToQuizz(file, difficulty_level)
    return output_file

interface = gr.Interface(
    fn=generate_quiz,
    inputs=[
        gr.File(file_types=[".pdf"], label="Upload PDF File"),  # Allow file upload
        gr.Dropdown(["Easy", "Medium", "Hard"], label="Select Difficulty Level")
    ],
    outputs=gr.File(label="Download Quiz Document"),
    title="Quiz Generator",
    description="Upload a PDF file and select a difficulty level to generate quiz questions."
)

# Launch the interface
interface.launch(debug=True)