from transformers import AutoModelForCausalLM, AutoTokenizer import torch # Load the model and tokenizer model_path = "./tinyllama_model" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) # Set device to CPU device = torch.device("cpu") model.to(device) # Input prompt prompt = "Hello, how can I assist you today?" # Tokenize input inputs = tokenizer(prompt, return_tensors="pt").to(device) # Generate text outputs = model.generate(**inputs, max_length=100, num_return_sequences=1) # Decode and print output generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) print(generated_text)