Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
import os | |
from huggingface_hub import login | |
# Retrieve the token from environment variable | |
hf_token = os.getenv("TUTOR_LLAMA") | |
login(token=hf_token) | |
# Load LLaMA model and tokenizer for Arabic and ESL tutoring | |
model_name = "meta-llama/Llama-3.2-1B" # Adjust to the LLaMA model you're using | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Set up the Hugging Face pipeline for text-generation task with LLaMA model | |
model_pipeline = pipeline( | |
"text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
device=-1 # Ensure it runs on CPU (adjust if using GPU) | |
) | |
# Streamlit app UI | |
st.title("AI Arabic and ESL Tutor") | |
st.write("Ask me a question in English or Arabic, and I will help you.") | |
# Sidebar for user to control model generation parameters | |
st.sidebar.title("Model Parameters") | |
temperature = st.sidebar.slider("Temperature", 0.1, 1.5, 1.0, 0.1) # Default 1.0 | |
top_p = st.sidebar.slider("Top-p (Nucleus Sampling)", 0.0, 1.0, 0.9, 0.05) # Default 0.9 | |
top_k = st.sidebar.slider("Top-k", 0, 100, 50, 1) # Default 50 | |
do_sample = st.sidebar.checkbox("Enable Random Sampling", value=True) # Enable sampling | |
# Input field for the student | |
student_question = st.text_input("Ask your question in English or Arabic!") | |
# Function to generate response with post-processing | |
def generate_response(prompt, max_length=75): | |
# Generate the model's response | |
response = model_pipeline( | |
prompt, | |
max_length=max_length, | |
temperature=temperature, | |
top_p=top_p, | |
top_k=top_k, | |
do_sample=do_sample | |
) | |
# Extract the generated text and remove the prompt (if necessary) | |
generated_text = response[0]['generated_text'] | |
# Find the first instance of the actual generated answer (post-prompt) | |
cleaned_text = generated_text.replace(prompt, "").strip() | |
return cleaned_text | |
# Generate and display response using the LLaMA model | |
if student_question: | |
# Format the prompt to guide the model to respond conversationally and concisely | |
prompt = f"Q: {student_question}\nA: Explain it simply to a young student in no more than 3 sentences." | |
# Call the function to generate and clean the response | |
answer = generate_response(prompt) | |
st.write("Tutor's Answer:", answer) |