File size: 405 Bytes
d818664
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
import gradio as gr
from llama_cpp import Llama

llm = Llama(
    model_path="MKLLM-7B-Instruct-Q4_0.gguf",
    n_ctx=2048
)

def chat(message, history):
    response = llm.create_completion(
        f"USER: {message}\nASSISTANT:",
        max_tokens=512,
        temperature=0.7
    )
    return response['choices'][0]['text']

demo = gr.ChatInterface(
    chat,
    title="MKLLM Chat",
)

demo.launch()