File size: 1,403 Bytes
684d3d9
 
c946ef8
 
684d3d9
c946ef8
684d3d9
 
 
 
 
 
 
c946ef8
a62087a
 
be111c3
 
684d3d9
be111c3
 
 
684d3d9
a62087a
 
 
684d3d9
be111c3
 
c946ef8
 
684d3d9
 
 
be111c3
 
684d3d9
be111c3
684d3d9
be111c3
c946ef8
684d3d9
c946ef8
 
a5b6e0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import json
import random
import gradio as gr

from tqdm import tqdm

from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    TextStreamer
)

from accelerate import disk_offload


model = AutoModelForCausalLM.from_pretrained(
    'DuongTrongChi/Rikka-1.8B-v2.2',
    torch_dtype="auto",
    device_map="auto"
)

disk_offload(model=model, offload_dir="offload")


tokenizer = AutoTokenizer.from_pretrained('DuongTrongChi/Rikka-1.8B-v2.2')
eos_token = tokenizer("<|im_end|>",add_special_tokens=False)["input_ids"][0]
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)


def chat(message, history=None):
    def chat_template(message):
        system_prompt = """Bạn là một trợ lý hữu ích, tôn trọng và trung thực. Luôn trả lời một cách hữu ích nhất có thể trong khi vẫn an toàn. """
        return [
            {"role": "system", "content": system_prompt},
            {"role": "question", "content": message}
        ]
    prompt = pipe.tokenizer.apply_chat_template(chat_template(message), tokenize=False, add_generation_prompt=True)
    outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.3, top_k=50, top_p=0.95, eos_token_id=eos_token, pad_token_id=eos_token)

    return outputs[0]['generated_text'][len(prompt):].strip()


gr.ChatInterface(chat).launch()