from llama_cpp import Llama def run_local_llm(): print("Loading AgGPT-9... (This may take a moment)") model_path = "./AgGPT-9.gguf" model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=35) print("Model loaded. Type 'exit' to quit.") while True: prompt = input("\nEnter your prompt: ") if prompt.lower() == 'exit': break messages = [ {"role": "system", "content": "You are AgGPT-9, an advanced AI assistant created by AG, the 9th series of the AgGPT models."}, {"role": "user", "content": prompt} ] output = model.create_chat_completion(messages, max_tokens=550, temperature=0.7) print("\nGenerated text:") print(output["choices"][0]["message"]["content"]) if __name__ == "__main__": run_local_llm()