File size: 4,196 Bytes
030905d
 
94dfd3b
030905d
 
 
 
 
94dfd3b
030905d
8e1b31f
030905d
 
8e1b31f
030905d
 
 
5154cbe
aa18d80
030905d
 
94dfd3b
030905d
 
 
 
 
 
 
 
 
 
 
94dfd3b
 
030905d
 
 
 
 
 
94dfd3b
030905d
 
 
 
8225915
030905d
 
 
94dfd3b
030905d
 
 
71c52b0
030905d
d1445bf
94dfd3b
030905d
 
 
 
 
 
 
 
 
15ccb8c
babb611
030905d
 
 
 
94dfd3b
030905d
 
 
 
94dfd3b
 
 
030905d
94dfd3b
030905d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8e1b31f
 
 
 
 
 
 
030905d
94dfd3b
 
 
030905d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import torch
from PIL import Image
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import os
from threading import Thread


HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL_ID = "kodetr/hukum-indo-qa-v1"
MODELS = os.environ.get("MODELS")

TITLE = "<h1><center>KONSULTASI HUKUM INDONESIA</center></h1>"

DESCRIPTION = f"""
<center>
<p>
Developed By Tanwir
</p>
</center>
"""

CSS = """
.duplicate-button {
    margin: auto !important;
    color: white !important;
    background: black !important;
    border-radius: 100vh !important;
}
h3 {
    text-align: center;
}
"""

model = AutoModelForCausalLM.from_pretrained(
          MODEL_ID,
          torch_dtype=torch.bfloat16,
          device_map="auto",
        )
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

@spaces.GPU
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
    print(f'message is - {message}')
    print(f'history is - {history}')
    conversation = [{"role": "system", "content": 'Di bawah ini adalah instruksi yang menjelaskan suatu tugas. Tulis respons yang menyelesaikan permintaan dengan tepat.'}]
    for prompt, answer in history:
        conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
    conversation.append({"role": "user", "content": message})

    print(f"Conversation is -\n{conversation}")
    
    input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer(input_ids, return_tensors="pt").to('cpu') #gpu 0, cpu 1
    
    streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)

    generate_kwargs = dict(
        inputs, 
        streamer=streamer,
        top_k=top_k,
        top_p=top_p,
        repetition_penalty=penalty,
        max_new_tokens=max_new_tokens, 
        do_sample=True, 
        temperature=temperature,
        pad_token_id=128000,
        eos_token_id=[128001,128008,128009],
    )
    
    thread = Thread(target=model.generate, kwargs=generate_kwargs)
    thread.start()

    buffer = ""
    for new_text in streamer:
        buffer += new_text
        yield buffer



chatbot = gr.Chatbot(height=600)

with gr.Blocks(css=CSS) as demo:
    gr.HTML(TITLE)
    gr.HTML(DESCRIPTION)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
    gr.ChatInterface(
        fn=stream_chat,
        chatbot=chatbot,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Slider(
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.8,
                label="Temperature",
                render=False,
            ),
            gr.Slider(
                minimum=128,
                maximum=4096,
                step=1,
                value=1024,
                label="Max new tokens",
                render=False,
            ),
            gr.Slider(
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=0.8,
                label="top_p",
                render=False,
            ),
            gr.Slider(
                minimum=1,
                maximum=20,
                step=1,
                value=20,
                label="top_k",
                render=False,
            ),
            gr.Slider(
                minimum=0.0,
                maximum=2.0,
                step=0.1,
                value=1.0,
                label="Repetition penalty",
                render=False,
            ),
        ],
        # examples=[
        #     ["Apa yang dimaksud tentang Stunting?"],
        #     ["Apa saja tanda-tanda anak mengalami stunting?"],
        #     ["Apa saja makanan yang bisa mencegah stunting?"],
        #     ["Bagaimana malnutrisi dapat mempengaruhi perkembangan otak anak?"],
        # ],
        # cache_examples=False,
    )


if __name__ == "__main__":
    demo.launch()