|
import gradio as gr |
|
from llama_cpp import Llama |
|
|
|
llm = Llama( |
|
model_path="yugogpt-q4_0.gguf", |
|
n_ctx=2048, |
|
n_threads=4 |
|
) |
|
|
|
def chat(message, history): |
|
|
|
prompt = f"""USER: {message} |
|
ASSISTANT: Let me provide you with a comprehensive and thoughtful response. |
|
|
|
""" |
|
|
|
response = llm( |
|
prompt, |
|
max_tokens=1024, |
|
temperature=0.8, |
|
stop=["USER:"], |
|
repeat_penalty=1.2, |
|
top_p=0.95 |
|
) |
|
|
|
return response['choices'][0]['text'] |
|
|
|
demo = gr.ChatInterface( |
|
fn=chat, |
|
title="YugoGPT Chat", |
|
description="Ask me anything - I'll provide detailed and thoughtful responses." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|