import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
import os
from huggingface_hub import login

# Retrieve the token from environment variable
hf_token = os.getenv("TUTOR_LLAMA")
login(token=hf_token)

# Load LLaMA model and tokenizer for Arabic and ESL tutoring
model_name = "meta-llama/Llama-3.2-1B"  # Adjust to the LLaMA model you're using
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

# Set up the Hugging Face pipeline for text-generation task with LLaMA model
model_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device=-1  # Ensure it runs on CPU (adjust if using GPU)
)

# Streamlit app UI
st.title("AI Arabic and ESL Tutor")
st.write("Ask me a question in English or Arabic, and I will help you.")

# Sidebar for user to control model generation parameters
st.sidebar.title("Model Parameters")
temperature = st.sidebar.slider("Temperature", 0.1, 1.5, 1.0, 0.1)  # Default 1.0
top_p = st.sidebar.slider("Top-p (Nucleus Sampling)", 0.0, 1.0, 0.9, 0.05)  # Default 0.9
top_k = st.sidebar.slider("Top-k", 0, 100, 50, 1)  # Default 50
do_sample = st.sidebar.checkbox("Enable Random Sampling", value=True)  # Enable sampling

# Input field for the student
student_question = st.text_input("Ask your question in English or Arabic!")

# Function to generate response with post-processing
def generate_response(prompt, max_length=75):
    # Generate the model's response
    response = model_pipeline(
        prompt,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p,
        top_k=top_k,
        do_sample=do_sample
    )
    
    # Extract the generated text and remove the prompt (if necessary)
    generated_text = response[0]['generated_text']
    
    # Find the first instance of the actual generated answer (post-prompt)
    cleaned_text = generated_text.replace(prompt, "").strip()
    return cleaned_text

# Generate and display response using the LLaMA model
if student_question:
    # Format the prompt to guide the model to respond conversationally and concisely
    prompt = f"Q: {student_question}\nA: Explain it simply to a young student in no more than 3 sentences."

    # Call the function to generate and clean the response
    answer = generate_response(prompt)
    
    st.write("Tutor's Answer:", answer)