Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
llm = Llama( | |
model_path="mkllm-7b-instruct-q4_0.gguf", | |
n_ctx=2048 | |
) | |
def chat(message, history): | |
response = llm.create_completion( | |
f"USER: {message}\nASSISTANT:", | |
max_tokens=512, | |
temperature=0.7 | |
) | |
return response['choices'][0]['text'] | |
demo = gr.ChatInterface( | |
chat, | |
title="MKLLM Chat", | |
) | |
demo.launch(server_name="0.0.0.0", server_port=7860) | |