Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -55,7 +55,7 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
55 |
input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
|
56 |
inputs = tokenizer(input_ids, return_tensors="pt").to("cpu") #GPU 0, CPU 1
|
57 |
|
58 |
-
streamer = TextIteratorStreamer(tokenizer, timeout=
|
59 |
|
60 |
generate_kwargs = dict(
|
61 |
inputs,
|
@@ -66,8 +66,8 @@ def stream_chat(message: str, history: list, temperature: float, max_new_tokens:
|
|
66 |
max_new_tokens=max_new_tokens,
|
67 |
do_sample=True,
|
68 |
temperature=temperature,
|
69 |
-
eos_token_id=
|
70 |
-
pad_token_id=
|
71 |
)
|
72 |
|
73 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|
|
|
55 |
input_ids = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
|
56 |
inputs = tokenizer(input_ids, return_tensors="pt").to("cpu") #GPU 0, CPU 1
|
57 |
|
58 |
+
streamer = TextIteratorStreamer(tokenizer, timeout=60., skip_prompt=True, skip_special_tokens=True)
|
59 |
|
60 |
generate_kwargs = dict(
|
61 |
inputs,
|
|
|
66 |
max_new_tokens=max_new_tokens,
|
67 |
do_sample=True,
|
68 |
temperature=temperature,
|
69 |
+
eos_token_id=128000,
|
70 |
+
pad_token_id=128000
|
71 |
)
|
72 |
|
73 |
thread = Thread(target=model.generate, kwargs=generate_kwargs)
|