import gradio as gr from llama_cpp import Llama llm = Llama( model_path="yugogpt-q4_0.gguf", n_ctx=2048, n_threads=4, n_batch=512, use_mlock=True, use_mmap=True ) def chat(message, history): # Enhanced system prompt for better responses system_prompt = "You are a helpful, knowledgeable, and professional AI assistant. Provide detailed and thoughtful responses." full_prompt = f"""SYSTEM: {system_prompt} USER: {message} ASSISTANT: Let me provide a comprehensive response. """ response = llm( full_prompt, max_tokens=2048, temperature=0.7, top_p=0.95, repeat_penalty=1.2, top_k=40, stop=["USER:", "\n\n"] ) return response['choices'][0]['text'] demo = gr.ChatInterface( fn=chat, title="YugoGPT Professional Assistant", description="I provide detailed and thoughtful responses to your questions.", examples=[ "Explain quantum computing", "What are the main principles of machine learning?", "How does blockchain technology work?" ] ) if __name__ == "__main__": demo.launch( server_name="0.0.0.0", server_port=7860, share=False )