Spaces:
Sleeping
Sleeping
File size: 4,034 Bytes
3d571e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import re
from docx import Document
import os
import gradio as gr
from PyPDF2 import PdfReader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain import PromptTemplate
from langchain import LLMChain
from langchain_together import Together
# Initialize Together API key
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5"
checkpoint = "sshleifer/distilbart-cnn-12-6"
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500)
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
def Summary_BART(text):
inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt")
summary_ids = model.generate(inputs["input_ids"])
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)
return summary[0]
def DocToQuizz(file, difficulty_level):
# Read the PDF content
reader = PdfReader(file.name) # Use `file.name` to access the uploaded file path
text = ""
for page in reader.pages:
text += page.extract_text()
summary = Summary_BART(text)
# Define the prompt template for generating questions
mcq_template = """
Generate 20 different multiple-choice questions (MCQs) based on the following summary: {summary}
The difficulty level of the questions should be: {difficulty_level}
For each MCQ, please provide the following:
1. Question
- Use varied question formats such as:
- "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc.
- Ensure questions are logically phrased and relevant to the content.
2. Correct answer
3. Three plausible incorrect answer options
4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>"
"""
prompt = PromptTemplate(
input_variables=['summary', 'difficulty_level'],
template=mcq_template
)
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt)
response = Generated_mcqs.invoke({
"summary": summary,
"difficulty_level": difficulty_level
})
response_text = response['text']
print(response_text)
# Updated MCQ pattern to match the format in the response
mcq_pattern = r'Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n'
mcqs = re.findall(mcq_pattern, response_text, re.DOTALL)
print(f"the mcqs from pattern{mcqs}")
# Initialize a Word document
doc = Document()
doc.add_heading("Physics Questions", level=1)
# Add a section for MCQs with options
doc.add_heading("Multiple Choice Questions (MCQs)", level=2)
for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1):
# Split incorrect answers
incorrect_answers = incorrect_answers.split(', ')
# Add question and options to the document
doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number")
doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet")
for i, incorrect in enumerate(incorrect_answers, start=2):
doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet")
# Save the document
doc.save("mcqs_Questions.docx")
return "mcqs_Questions.docx"
# Gradio Interface
def generate_quiz(file, difficulty_level):
output_file = DocToQuizz(file, difficulty_level)
return output_file
interface = gr.Interface(
fn=generate_quiz,
inputs=[
gr.File(file_types=[".pdf"], label="Upload PDF File"), # Allow file upload
gr.Dropdown(["Easy", "Medium", "Hard"], label="Select Difficulty Level")
],
outputs=gr.File(label="Download Quiz Document"),
title="Quiz Generator",
description="Upload a PDF file and select a difficulty level to generate quiz questions."
)
# Launch the interface
interface.launch(debug=True) |