from unsloth import FastLanguageModel
from transformers import TextStreamer
import gradio as gr
import torch

# Model Configuration
max_seq_length = 2048
dtype = None
load_in_4bit = True

# Load the model and tokenizer
model_name = "unsloth/Mistral-Nemo-Base-2407"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)
FastLanguageModel.for_inference(model)  # Optimize for inference

# Define the Alpaca-style prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}
"""

# Define the MCQ generation function
def generate_mcq(passage):
    instruction = "Generate a multiple-choice question (MCQ) based on the passage, provide options, and indicate the correct option."
    input_text = f"Passage: {passage}"
    prompt = alpaca_prompt.format(instruction, input_text, "")

    # Tokenize input and generate output
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=max_seq_length).to("cuda")
    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=128,
        pad_token_id=tokenizer.eos_token_id,
        num_return_sequences=1,
        do_sample=True,
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Define the Gradio interface
interface = gr.Interface(
    fn=generate_mcq,
    inputs=[gr.Textbox(label="Enter Passage", placeholder="Enter a passage to generate MCQs")],
    outputs="text",
    title="MCQ Generator with Mistral",
    description="Generate multiple-choice questions using Mistral LLM. Enter a passage and get an MCQ with answer options."
)

# Launch the app
if __name__ == "__main__":
    interface.launch()