Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
from transformers import GemmaTokenizer, AutoModelForCausalLM
|
| 6 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
| 7 |
+
from threading import Thread
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
DESCRIPTION = '''
|
| 11 |
+
<div>
|
| 12 |
+
<h1 style="text-align: center;">Indonesian Legal Assistant</h1>
|
| 13 |
+
</div>
|
| 14 |
+
'''
|
| 15 |
+
|
| 16 |
+
LICENSE = """
|
| 17 |
+
<p/>
|
| 18 |
+
---
|
| 19 |
+
Built with Deepseek Qwen3
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
PLACEHOLDER = """
|
| 23 |
+
<div style="padding: 30px; text-align: center; display: flex; flex-direction: column; align-items: center;">
|
| 24 |
+
<h1 style="font-size: 28px; margin-bottom: 2px; opacity: 0.55;">DeepSeek-R1-0528-Qwen3-8B</h1>
|
| 25 |
+
<p style="font-size: 18px; margin-bottom: 2px; opacity: 0.65;">Finetuned with Indonesian Legal Consulting Data</p>
|
| 26 |
+
</div>
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
css = """
|
| 31 |
+
h1 {
|
| 32 |
+
text-align: center;
|
| 33 |
+
display: block;
|
| 34 |
+
}
|
| 35 |
+
#duplicate-button {
|
| 36 |
+
margin: auto;
|
| 37 |
+
color: white;
|
| 38 |
+
background: #1565c0;
|
| 39 |
+
border-radius: 100vh;
|
| 40 |
+
}
|
| 41 |
+
"""
|
| 42 |
+
|
| 43 |
+
# Load the tokenizer and model
|
| 44 |
+
model_name = 'Azzindani/Deepseek_ID_Legal_Preview'
|
| 45 |
+
|
| 46 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code = True)
|
| 47 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map = 'auto', trust_remote_code = True, torch_dtype = torch.float16)
|
| 48 |
+
#model.to('cuda' if torch.cuda.is_available() else 'cpu')
|
| 49 |
+
|
| 50 |
+
def chat(message, history, temperature = 0.7, max_new_tokens = 1024, show_thinking = True):
|
| 51 |
+
SYSTEM_PROMPT = """
|
| 52 |
+
Anda adalah asisten AI yang ahli di bidang hukum Indonesia. Anda dapat membantu konsultasi hukum, menjawab pertanyaan, dan memberikan analisis berdasarkan peraturan perundang-undangan yang relevan.
|
| 53 |
+
Untuk setiap respons, Anda harus berfikir dan menjawab dengan Bahasa Indonesia, serta gunakan format:
|
| 54 |
+
<think>
|
| 55 |
+
...
|
| 56 |
+
</think>
|
| 57 |
+
Tuliskan jawaban akhir secara jelas, ringkas, profesional, dan berempati jika diperlukan. Gunakan bahasa hukum yang mudah dipahami. Sertakan referensi hukum Indonesia yang relevan. Selalu rekomendasikan konsultasi dengan ahli hukum untuk keputusan final.
|
| 58 |
+
"""
|
| 59 |
+
|
| 60 |
+
# Prepare conversation history
|
| 61 |
+
conversation = [{'role' : 'system', 'content' : SYSTEM_PROMPT}]
|
| 62 |
+
for user_msg, assistant_msg in history:
|
| 63 |
+
conversation.append({'role' : 'user', 'content' : user_msg})
|
| 64 |
+
conversation.append({'role' : 'assistant', 'content' : assistant_msg})
|
| 65 |
+
conversation.append({'role' : 'user', 'content' : message})
|
| 66 |
+
|
| 67 |
+
# Ensure tokenizer and model are properly referenced
|
| 68 |
+
try:
|
| 69 |
+
input_ids = tokenizer.apply_chat_template(
|
| 70 |
+
conversation,
|
| 71 |
+
tokenize = True,
|
| 72 |
+
add_generation_prompt = True,
|
| 73 |
+
return_tensors = 'pt'
|
| 74 |
+
).to(model.device)
|
| 75 |
+
except Exception as e:
|
| 76 |
+
return f"Error preparing input: {str(e)}"
|
| 77 |
+
|
| 78 |
+
streamer = TextIteratorStreamer(tokenizer, skip_prompt = True, skip_special_tokens = True)
|
| 79 |
+
generate_kwargs = {
|
| 80 |
+
'input_ids' : input_ids,
|
| 81 |
+
'streamer' : streamer,
|
| 82 |
+
'max_new_tokens' : max_new_tokens,
|
| 83 |
+
'do_sample' : temperature > 0,
|
| 84 |
+
'temperature' : temperature if temperature > 0 else 1.0,
|
| 85 |
+
'top_p' : 0.95,
|
| 86 |
+
'top_k' : 20,
|
| 87 |
+
#'repetition_penalty' : 1.1
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
# Start generation in a thread to avoid blocking
|
| 91 |
+
thread = Thread(target = model.generate, kwargs = generate_kwargs)
|
| 92 |
+
thread.start()
|
| 93 |
+
|
| 94 |
+
# Buffers and flags
|
| 95 |
+
thinking_content = []
|
| 96 |
+
final_answer = []
|
| 97 |
+
live_output = []
|
| 98 |
+
in_thinking_block = False
|
| 99 |
+
saw_think_tag = False
|
| 100 |
+
thinking_header_shown = False
|
| 101 |
+
|
| 102 |
+
# Tracking streaming state
|
| 103 |
+
has_started_output = False
|
| 104 |
+
accumulated_text = ''
|
| 105 |
+
|
| 106 |
+
for new_text in streamer:
|
| 107 |
+
accumulated_text += new_text
|
| 108 |
+
has_started_output = True
|
| 109 |
+
|
| 110 |
+
# Detect thinking tags
|
| 111 |
+
if '<think>' in new_text:
|
| 112 |
+
in_thinking_block = True
|
| 113 |
+
saw_think_tag = True
|
| 114 |
+
new_text = new_text.replace('<think>', '')
|
| 115 |
+
if not thinking_header_shown:
|
| 116 |
+
live_output.append('\n🧠 **Sedang berfikir...**\n')
|
| 117 |
+
thinking_header_shown = True
|
| 118 |
+
|
| 119 |
+
if '</think>' in new_text:
|
| 120 |
+
in_thinking_block = False
|
| 121 |
+
new_text = new_text.replace('</think>', '')
|
| 122 |
+
live_output.append('\n\n-----\n✅ **Sedang menjawab:**\n')
|
| 123 |
+
|
| 124 |
+
# Handle output based on current state
|
| 125 |
+
if saw_think_tag:
|
| 126 |
+
# Normal operation with thinking tags
|
| 127 |
+
if in_thinking_block:
|
| 128 |
+
thinking_content.append(new_text)
|
| 129 |
+
live_output.append(new_text)
|
| 130 |
+
else:
|
| 131 |
+
# After thinking block, everything is the answer
|
| 132 |
+
final_answer.append(new_text)
|
| 133 |
+
live_output.append(new_text)
|
| 134 |
+
else:
|
| 135 |
+
# No thinking tags detected yet
|
| 136 |
+
if len(accumulated_text) > 20 and not saw_think_tag:
|
| 137 |
+
# No thinking format detected, treat everything as answer
|
| 138 |
+
if not thinking_header_shown:
|
| 139 |
+
live_output.append("\n⏭️ **Jawaban langsung:**\n\n")
|
| 140 |
+
thinking_header_shown = True
|
| 141 |
+
|
| 142 |
+
final_answer.append(new_text)
|
| 143 |
+
live_output.append(new_text)
|
| 144 |
+
else:
|
| 145 |
+
# Still accumulating to detect format
|
| 146 |
+
live_output.append(new_text)
|
| 147 |
+
|
| 148 |
+
# Always yield current output
|
| 149 |
+
yield ''.join(live_output)
|
| 150 |
+
|
| 151 |
+
# Final output format handling
|
| 152 |
+
if thinking_content:
|
| 153 |
+
# Show collapsible thinking section if there was thinking content
|
| 154 |
+
final_output = (
|
| 155 |
+
'<details><summary>🧠 <b>Proses berfikir (klik untuk melihat)</b></summary>\n\n'
|
| 156 |
+
+ ''.join(thinking_content) +
|
| 157 |
+
'\n</details>\n\n'
|
| 158 |
+
+ '-----\n✅ **Jawaban:**\n'
|
| 159 |
+
+ ''.join(final_answer)
|
| 160 |
+
)
|
| 161 |
+
yield final_output
|
| 162 |
+
|
| 163 |
+
# If no final answer was produced but we have output, use the entire output
|
| 164 |
+
if not final_answer and has_started_output:
|
| 165 |
+
final_answer = [accumulated_text]
|
| 166 |
+
|
| 167 |
+
# Update chat history with the clean answer only (no thinking/formatting)
|
| 168 |
+
history.append((message, ''.join(final_answer)))
|
| 169 |
+
return None # Explicit return to end function after yielding
|
| 170 |
+
|
| 171 |
+
# Gradio block
|
| 172 |
+
chatbot = gr.Chatbot(height = 500, placeholder = PLACEHOLDER, label = 'Legal Assistant')
|
| 173 |
+
|
| 174 |
+
with gr.Blocks(fill_height = True, css = css) as demo:
|
| 175 |
+
|
| 176 |
+
gr.Markdown(DESCRIPTION)
|
| 177 |
+
#gr.DuplicateButton(value = "Duplicate Space for private use", elem_id = "duplicate-button")
|
| 178 |
+
gr.ChatInterface(
|
| 179 |
+
fn = chat,
|
| 180 |
+
chatbot = chatbot,
|
| 181 |
+
fill_height = True,
|
| 182 |
+
additional_inputs_accordion = gr.Accordion(label = '⚙️ Parameters', open = False, render = False),
|
| 183 |
+
additional_inputs = [
|
| 184 |
+
gr.Slider(minimum = 0,
|
| 185 |
+
maximum = 1,
|
| 186 |
+
step = 0.1,
|
| 187 |
+
value = 0.6,
|
| 188 |
+
label = 'Temperature',
|
| 189 |
+
render = False),
|
| 190 |
+
gr.Slider(minimum = 128,
|
| 191 |
+
maximum = 4096,
|
| 192 |
+
step = 1,
|
| 193 |
+
value = 2048,
|
| 194 |
+
label = 'Max new tokens',
|
| 195 |
+
render = False),
|
| 196 |
+
],
|
| 197 |
+
examples = [
|
| 198 |
+
['Saya seorang CPNS di lingkungan Kementerian Hukum dan HAM RI, bekerja dengan sistem (anggota regu pengamanan lapas). Lazimnya PNS dengan jabatan yang sama diberikan libur kerja selama 2 hari. Saya selaku CPNS diwajibkan untuk melaksanakan dinas pada hari libur ke-2 secara berkala tanpa diberikan perintah secara tertulis/surat perintah. Apakah boleh CPNS bekerja dengan jam kerja yang sama dengan PNS? Dan Apakah CPNS boleh bekerja di luar aturan kerja yang sudah ditentukan?'],
|
| 199 |
+
['Mohon bantu jelaskan apa itu politik luar negeri? Indonesia menganut konsep politik luar negeri apa? Apa dasar hukum politik luar negeri Indonesia?'],
|
| 200 |
+
['Saya adalah istri kedua dari pernikahan siri, yang mana pernikahannya dilaksanakan tanpa sepengetahuan istri pertama. Dari pernikahan ini kami telah dikaruniai anak. Namun, saya ingin menggugat cerai suami. Bisakah istri siri menggugat cerai atau minta cerai? Bagaimana caranya?'],
|
| 201 |
+
['Apa yang dimaksud dengan bank perantara dan bank kustodian?Bagaimana langkah untuk meminta eksekusi dari putusan arbitrase nasional? Bisakah kita mengajukan eksekusi terhadap harta salah satu pihak yang tidak disebutkan dalam putusan arbitrase?'],
|
| 202 |
+
['Adakah hukumnya yang mengatur pembagian persentase/laba dalam mendirikan suatu perusahaan? Dan berapa persenkah yang didapat oleh si pemilik ide untuk mendirikan perusahaan, jika dia tidak menyetor modal sedikit pun atau hanya menjalankan saja?'],
|
| 203 |
+
],
|
| 204 |
+
cache_examples = False,
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
gr.Markdown(LICENSE)
|
| 208 |
+
|
| 209 |
+
if __name__ == '__main__':
|
| 210 |
+
demo.launch()
|