from llama_cpp import Llama

def run_local_llm():
    print("Loading AgGPT-9... (This may take a moment)")

    model_path = "./AgGPT-9.gguf"  
    model = Llama(model_path=model_path, n_ctx=2048, n_gpu_layers=35)

    print("Model loaded. Type 'exit' to quit.")
    while True:
        prompt = input("\nEnter your prompt: ")
        if prompt.lower() == 'exit':
            break

        messages = [
            {"role": "system", "content": "You are AgGPT-9, an advanced AI assistant created by AG, the 9th series of the AgGPT models."},
            {"role": "user", "content": prompt}
        ]

        output = model.create_chat_completion(messages, max_tokens=550, temperature=0.7)

        print("\nGenerated text:")
        print(output["choices"][0]["message"]["content"])

if __name__ == "__main__":
    run_local_llm()