import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM # Load the model and tokenizer from Hugging Face's hub tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b") model = AutoModelForCausalLM.from_pretrained("EleutherAI/gpt-neox-20b") # Streamlit app UI st.title("AI English Tutor") st.write("Ask me a question, and I will help you!") # Sidebar for user to control model generation parameters st.sidebar.title("Model Parameters") temperature = st.sidebar.slider("Temperature", 0.1, 1.5, 1.0, 0.1) # Default 1.0 top_p = st.sidebar.slider("Top-p (Nucleus Sampling)", 0.0, 1.0, 0.9, 0.05) # Default 0.9 top_k = st.sidebar.slider("Top-k", 0, 100, 50, 1) # Default 50 # Disable sampling when using beam search do_sample = st.sidebar.checkbox("Enable Random Sampling", value=False) # Input field for the student student_question = st.text_input("Ask your question!") # Generate and display response using the model's generate() function if student_question: # Prepare the input for the model input_text = f"You are a tutor for a young student. Please explain the answer to this question step by step in simple terms: '{student_question}'" inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=256) # Reduced max_length to 256 # Generate response generated_ids = model.generate( inputs['input_ids'], #max_length=75, #min_length=20, temperature=temperature, top_p=top_p, top_k=top_k, do_sample=True, # Disable sampling, using beam search #num_beams=2, # Use beam search no_repeat_ngram_size=3, # Prevent repeating phrases of 3 words or more length_penalty=1.0, # Discourage overly long responses early_stopping=False # Stops when it finds a sufficiently good output ) # Decode the generated response response = tokenizer.decode(generated_ids[0], skip_special_tokens=True) st.write("Tutor's Answer:", response)