FlameF0X commited on
Commit
e76f2f8
·
verified ·
1 Parent(s): 02126b9

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +84 -0
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
+ import torch
4
+ from threading import Thread
5
+
6
+ # Load model and tokenizer
7
+ model_name = "GoofyLM/BrainrotLM-Assistant"
8
+ model = AutoModelForCausalLM.from_pretrained(
9
+ model_name,
10
+ device_map="auto",
11
+ torch_dtype=torch.float16
12
+ )
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
14
+
15
+ # Set pad token if missing
16
+ if tokenizer.pad_token is None:
17
+ tokenizer.pad_token = tokenizer.eos_token
18
+
19
+ # Define a custom chat template if one is not available
20
+ if tokenizer.chat_template is None:
21
+ # Basic ChatML-style template
22
+ tokenizer.chat_template = "{% for message in messages %}\n{% if message['role'] == 'system' %}<|system|>\n{{ message['content'] }}\n{% elif message['role'] == 'user' %}<|user|>\n{{ message['content'] }}\n{% elif message['role'] == 'assistant' %}<|assistant|>\n{{ message['content'] }}\n{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}<|assistant|>\n{% endif %}"
23
+
24
+ def respond(
25
+ message,
26
+ history: list[tuple[str, str]],
27
+ system_message,
28
+ max_tokens,
29
+ temperature,
30
+ top_p,
31
+ ):
32
+ # Build conversation messages
33
+ messages = [{"role": "system", "content": system_message}]
34
+
35
+ for user_msg, assistant_msg in history:
36
+ if user_msg:
37
+ messages.append({"role": "user", "content": user_msg})
38
+ if assistant_msg:
39
+ messages.append({"role": "assistant", "content": assistant_msg})
40
+
41
+ messages.append({"role": "user", "content": message})
42
+
43
+ # Format prompt using chat template
44
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
45
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
+
47
+ # Set up streaming
48
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
49
+
50
+ # Configure generation parameters
51
+ do_sample = temperature > 0 or top_p < 1.0
52
+ generation_kwargs = dict(
53
+ **inputs,
54
+ streamer=streamer,
55
+ max_new_tokens=max_tokens,
56
+ temperature=temperature,
57
+ top_p=top_p,
58
+ do_sample=do_sample,
59
+ pad_token_id=tokenizer.pad_token_id
60
+ )
61
+
62
+ # Start generation in separate thread
63
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
64
+ thread.start()
65
+
66
+ # Stream response
67
+ response = ""
68
+ for token in streamer:
69
+ response += token
70
+ yield response
71
+
72
+ # Create Gradio interface
73
+ demo = gr.ChatInterface(
74
+ respond,
75
+ additional_inputs=[
76
+ gr.Textbox(value="You are BrainrotLM an AI assistant.", label="System message"),
77
+ gr.Slider(1, 2048, value=72, label="Max new tokens"),
78
+ gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
79
+ gr.Slider(0.1, 1.0, value=0.95, label="Top-p (nucleus sampling)"),
80
+ ],
81
+ )
82
+
83
+ if __name__ == "__main__":
84
+ demo.launch()