Spaces:
Sleeping
Sleeping
import re | |
from docx import Document | |
import os | |
import gradio as gr | |
from PyPDF2 import PdfReader | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
from langchain import PromptTemplate | |
from langchain import LLMChain | |
from langchain_together import Together | |
# Initialize Together API key | |
os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5" | |
checkpoint = "sshleifer/distilbart-cnn-12-6" | |
llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500) | |
tokenizer = AutoTokenizer.from_pretrained(checkpoint) | |
model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) | |
def Summary_BART(text): | |
inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") | |
summary_ids = model.generate(inputs["input_ids"]) | |
summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) | |
return summary[0] | |
def DocToQuizz(file, difficulty_level): | |
# Read the PDF content | |
reader = PdfReader(file.name) # Use `file.name` to access the uploaded file path | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
summary = Summary_BART(text) | |
# Define the prompt template for generating questions | |
mcq_template = """ | |
Generate 20 different multiple-choice questions (MCQs) based on the following summary: {summary} | |
The difficulty level of the questions should be: {difficulty_level} | |
For each MCQ, please provide the following: | |
1. Question | |
- Use varied question formats such as: | |
- "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc. | |
- Ensure questions are logically phrased and relevant to the content. | |
2. Correct answer | |
3. Three plausible incorrect answer options | |
4. Format: "Question: <question text>\nCorrect answer: <correct answer>\nIncorrect answers: <option1>, <option2>, <option3>" | |
""" | |
prompt = PromptTemplate( | |
input_variables=['summary', 'difficulty_level'], | |
template=mcq_template | |
) | |
Generated_mcqs = LLMChain(llm=llama3, prompt=prompt) | |
response = Generated_mcqs.invoke({ | |
"summary": summary, | |
"difficulty_level": difficulty_level | |
}) | |
response_text = response['text'] | |
print(response_text) | |
# Updated MCQ pattern to match the format in the response | |
mcq_pattern = r'Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n' | |
mcqs = re.findall(mcq_pattern, response_text, re.DOTALL) | |
print(f"the mcqs from pattern{mcqs}") | |
# Initialize a Word document | |
doc = Document() | |
doc.add_heading("Physics Questions", level=1) | |
# Add a section for MCQs with options | |
doc.add_heading("Multiple Choice Questions (MCQs)", level=2) | |
for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1): | |
# Split incorrect answers | |
incorrect_answers = incorrect_answers.split(', ') | |
# Add question and options to the document | |
doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number") | |
doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet") | |
for i, incorrect in enumerate(incorrect_answers, start=2): | |
doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet") | |
# Save the document | |
doc.save("mcqs_Questions.docx") | |
return "mcqs_Questions.docx" | |
# Gradio Interface | |
def generate_quiz(file, difficulty_level): | |
output_file = DocToQuizz(file, difficulty_level) | |
return output_file | |
interface = gr.Interface( | |
fn=generate_quiz, | |
inputs=[ | |
gr.File(file_types=[".pdf"], label="Upload PDF File"), # Allow file upload | |
gr.Dropdown(["Easy", "Medium", "Hard"], label="Select Difficulty Level") | |
], | |
outputs=gr.File(label="Download Quiz Document"), | |
title="Quiz Generator", | |
description="Upload a PDF file and select a difficulty level to generate quiz questions." | |
) | |
# Launch the interface | |
interface.launch(debug=True) |