|
import gradio as gr |
|
from llama_cpp import Llama |
|
|
|
llm = Llama( |
|
model_path="yugogpt-q4_0.gguf", |
|
n_ctx=2048, |
|
n_threads=4, |
|
n_batch=512, |
|
use_mlock=True, |
|
use_mmap=True |
|
) |
|
|
|
def chat(message, history): |
|
|
|
system_prompt = "You are a helpful, knowledgeable, and professional AI assistant. Provide detailed and thoughtful responses." |
|
|
|
full_prompt = f"""SYSTEM: {system_prompt} |
|
USER: {message} |
|
ASSISTANT: Let me provide a comprehensive response. |
|
""" |
|
|
|
response = llm( |
|
full_prompt, |
|
max_tokens=2048, |
|
temperature=0.7, |
|
top_p=0.95, |
|
repeat_penalty=1.2, |
|
top_k=40, |
|
stop=["USER:", "\n\n"] |
|
) |
|
|
|
return response['choices'][0]['text'] |
|
|
|
demo = gr.ChatInterface( |
|
fn=chat, |
|
title="YugoGPT Professional Assistant", |
|
description="I provide detailed and thoughtful responses to your questions.", |
|
examples=[ |
|
"Explain quantum computing", |
|
"What are the main principles of machine learning?", |
|
"How does blockchain technology work?" |
|
] |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch( |
|
server_name="0.0.0.0", |
|
server_port=7860, |
|
share=False |
|
) |
|
|
|
|