import re from docx import Document import os import gradio as gr from PyPDF2 import PdfReader from transformers import AutoTokenizer, AutoModelForSeq2SeqLM from langchain import PromptTemplate from langchain import LLMChain from langchain_together import Together # Initialize Together API key os.environ['TOGETHER_API_KEY'] = "c2f52626b97118b71c0c36f66eda4f5957c8fc475e760c3d72f98ba07d3ed3b5" checkpoint = "sshleifer/distilbart-cnn-12-6" llama3 = Together(model="meta-llama/Llama-3-70b-chat-hf", max_tokens=2500) tokenizer = AutoTokenizer.from_pretrained(checkpoint) model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint) def Summary_BART(text): inputs = tokenizer(text, max_length=1024, truncation=True, return_tensors="pt") summary_ids = model.generate(inputs["input_ids"]) summary = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False) return summary[0] def DocToQuizz(file, difficulty_level): # Read the PDF content reader = PdfReader(file.name) # Use `file.name` to access the uploaded file path text = "" for page in reader.pages: text += page.extract_text() summary = Summary_BART(text) # Define the prompt template for generating questions mcq_template = """ Generate 20 different multiple-choice questions (MCQs) based on the following summary: {summary} The difficulty level of the questions should be: {difficulty_level} For each MCQ, please provide the following: 1. Question - Use varied question formats such as: - "How does...", "Why is...", "In what way...", "Which of the following...", "When does...", etc. - Ensure questions are logically phrased and relevant to the content. 2. Correct answer 3. Three plausible incorrect answer options 4. Format: "Question: \nCorrect answer: \nIncorrect answers: , , " """ prompt = PromptTemplate( input_variables=['summary', 'difficulty_level'], template=mcq_template ) Generated_mcqs = LLMChain(llm=llama3, prompt=prompt) response = Generated_mcqs.invoke({ "summary": summary, "difficulty_level": difficulty_level }) response_text = response['text'] print(response_text) # Updated MCQ pattern to match the format in the response mcq_pattern = r'Question:\s*(.*?)\nCorrect answer:\s*(.*?)\nIncorrect answers:\s*(.*?)\n' mcqs = re.findall(mcq_pattern, response_text, re.DOTALL) print(f"the mcqs from pattern{mcqs}") # Initialize a Word document doc = Document() doc.add_heading("Physics Questions", level=1) # Add a section for MCQs with options doc.add_heading("Multiple Choice Questions (MCQs)", level=2) for idx, (question, correct_answer, incorrect_answers) in enumerate(mcqs, start=1): # Split incorrect answers incorrect_answers = incorrect_answers.split(', ') # Add question and options to the document doc.add_paragraph(f"Q{idx}: {question.strip()}", style="List Number") doc.add_paragraph(f"A) {correct_answer.strip()}", style="List Bullet") for i, incorrect in enumerate(incorrect_answers, start=2): doc.add_paragraph(f"{chr(64 + i)}) {incorrect.strip()}", style="List Bullet") # Save the document doc.save("mcqs_Questions.docx") return "mcqs_Questions.docx" # Gradio Interface def generate_quiz(file, difficulty_level): output_file = DocToQuizz(file, difficulty_level) return output_file interface = gr.Interface( fn=generate_quiz, inputs=[ gr.File(file_types=[".pdf"], label="Upload PDF File"), # Allow file upload gr.Dropdown(["Easy", "Medium", "Hard"], label="Select Difficulty Level") ], outputs=gr.File(label="Download Quiz Document"), title="Quiz Generator", description="Upload a PDF file and select a difficulty level to generate quiz questions." ) # Launch the interface interface.launch(debug=True)