from unsloth import FastLanguageModel from transformers import TextStreamer import gradio as gr import torch # Model Configuration max_seq_length = 2048 dtype = None load_in_4bit = True # Load the model and tokenizer model_name = "unsloth/Mistral-Nemo-Base-2407" model, tokenizer = FastLanguageModel.from_pretrained( model_name=model_name, max_seq_length=max_seq_length, dtype=dtype, load_in_4bit=load_in_4bit, ) FastLanguageModel.for_inference(model) # Optimize for inference # Define the Alpaca-style prompt alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {} """ # Define the MCQ generation function def generate_mcq(passage): instruction = "Generate a multiple-choice question (MCQ) based on the passage, provide options, and indicate the correct option." input_text = f"Passage: {passage}" prompt = alpaca_prompt.format(instruction, input_text, "") # Tokenize input and generate output inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_seq_length).to("cuda") outputs = model.generate( input_ids=inputs.input_ids, attention_mask=inputs.attention_mask, max_new_tokens=128, pad_token_id=tokenizer.eos_token_id, num_return_sequences=1, do_sample=True, ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) return response # Define the Gradio interface interface = gr.Interface( fn=generate_mcq, inputs=[gr.Textbox(label="Enter Passage", placeholder="Enter a passage to generate MCQs")], outputs="text", title="MCQ Generator with Mistral", description="Generate multiple-choice questions using Mistral LLM. Enter a passage and get an MCQ with answer options." ) # Launch the app if __name__ == "__main__": interface.launch()