Chris Alexiuk
commited on
Commit
·
3f8a366
1
Parent(s):
e1f8031
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
20 |
device_map={"": 0}
|
21 |
)
|
22 |
|
23 |
-
model = PeftModel.from_pretrained(
|
24 |
model = m.merge_and_unload()
|
25 |
tokenizer = LlamaTokenizer.from_pretrained(model_name)
|
26 |
tokenizer.bos_token_id = 1
|
@@ -98,9 +98,9 @@ def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id)
|
|
98 |
messages = convert_history_to_text(history)
|
99 |
|
100 |
# Tokenize the messages string
|
101 |
-
input_ids =
|
102 |
-
input_ids = input_ids.to(
|
103 |
-
streamer = TextIteratorStreamer(
|
104 |
generate_kwargs = dict(
|
105 |
input_ids=input_ids,
|
106 |
max_new_tokens=max_new_tokens,
|
@@ -116,7 +116,7 @@ def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id)
|
|
116 |
stream_complete = Event()
|
117 |
|
118 |
def generate_and_signal_complete():
|
119 |
-
|
120 |
stream_complete.set()
|
121 |
|
122 |
def log_after_stream_complete():
|
|
|
20 |
device_map={"": 0}
|
21 |
)
|
22 |
|
23 |
+
model = PeftModel.from_pretrained(model, adapters_name)
|
24 |
model = m.merge_and_unload()
|
25 |
tokenizer = LlamaTokenizer.from_pretrained(model_name)
|
26 |
tokenizer.bos_token_id = 1
|
|
|
98 |
messages = convert_history_to_text(history)
|
99 |
|
100 |
# Tokenize the messages string
|
101 |
+
input_ids = tokenizer(messages, return_tensors="pt").input_ids
|
102 |
+
input_ids = input_ids.to(model.device)
|
103 |
+
streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
|
104 |
generate_kwargs = dict(
|
105 |
input_ids=input_ids,
|
106 |
max_new_tokens=max_new_tokens,
|
|
|
116 |
stream_complete = Event()
|
117 |
|
118 |
def generate_and_signal_complete():
|
119 |
+
model.generate(**generate_kwargs)
|
120 |
stream_complete.set()
|
121 |
|
122 |
def log_after_stream_complete():
|