Spaces:
Sleeping
Sleeping
File size: 4,196 Bytes
030905d 94dfd3b 030905d 94dfd3b 030905d 8e1b31f 030905d 8e1b31f 030905d 5154cbe aa18d80 030905d 94dfd3b 030905d 94dfd3b 030905d 94dfd3b 030905d 8225915 030905d 94dfd3b 030905d 71c52b0 030905d d1445bf 94dfd3b 030905d 15ccb8c babb611 030905d 94dfd3b 030905d 94dfd3b 030905d 94dfd3b 030905d 8e1b31f 030905d 94dfd3b 030905d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import torch
from PIL import Image
import gradio as gr
import spaces
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
import os
from threading import Thread
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL_ID = "kodetr/hukum-indo-qa-v1"
MODELS = os.environ.get("MODELS")
TITLE = "<h1><center>KONSULTASI HUKUM INDONESIA</center></h1>"
DESCRIPTION = f"""
<center>
<p>
Developed By Tanwir
</p>
</center>
"""
CSS = """
.duplicate-button {
margin: auto !important;
color: white !important;
background: black !important;
border-radius: 100vh !important;
}
h3 {
text-align: center;
}
"""
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
@spaces.GPU
def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int, top_p: float, top_k: int, penalty: float):
print(f'message is - {message}')
print(f'history is - {history}')
conversation = [{"role": "system", "content": 'Di bawah ini adalah instruksi yang menjelaskan suatu tugas. Tulis respons yang menyelesaikan permintaan dengan tepat.'}]
for prompt, answer in history:
conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
conversation.append({"role": "user", "content": message})
print(f"Conversation is -\n{conversation}")
input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(input_ids, return_tensors="pt").to('cpu') #gpu 0, cpu 1
streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
inputs,
streamer=streamer,
top_k=top_k,
top_p=top_p,
repetition_penalty=penalty,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
pad_token_id=128000,
eos_token_id=[128001,128008,128009],
)
thread = Thread(target=model.generate, kwargs=generate_kwargs)
thread.start()
buffer = ""
for new_text in streamer:
buffer += new_text
yield buffer
chatbot = gr.Chatbot(height=600)
with gr.Blocks(css=CSS) as demo:
gr.HTML(TITLE)
gr.HTML(DESCRIPTION)
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
gr.ChatInterface(
fn=stream_chat,
chatbot=chatbot,
fill_height=True,
additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
additional_inputs=[
gr.Slider(
minimum=0,
maximum=1,
step=0.1,
value=0.8,
label="Temperature",
render=False,
),
gr.Slider(
minimum=128,
maximum=4096,
step=1,
value=1024,
label="Max new tokens",
render=False,
),
gr.Slider(
minimum=0.0,
maximum=1.0,
step=0.1,
value=0.8,
label="top_p",
render=False,
),
gr.Slider(
minimum=1,
maximum=20,
step=1,
value=20,
label="top_k",
render=False,
),
gr.Slider(
minimum=0.0,
maximum=2.0,
step=0.1,
value=1.0,
label="Repetition penalty",
render=False,
),
],
# examples=[
# ["Apa yang dimaksud tentang Stunting?"],
# ["Apa saja tanda-tanda anak mengalami stunting?"],
# ["Apa saja makanan yang bisa mencegah stunting?"],
# ["Bagaimana malnutrisi dapat mempengaruhi perkembangan otak anak?"],
# ],
# cache_examples=False,
)
if __name__ == "__main__":
demo.launch() |