Maybe I'm missing something but I can't get coherent outputs
#2
by
wassname
- opened
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
model_id="v2ray/GPT4chan-8B"
model = AutoModelForCausalLM.from_pretrained(model_id,
torch_dtype=torch.bfloat16).to("cuda") .eval)_
tokenizer = AutoTokenizer.from_pretrained(model_id)
# jinja2 template for tokeniser formatting
template = """g
{% for message in messages -%}
<|start_header_id|>{{ loop.index }}<|end_header_id|>
{{ message['content'] }}
{% endfor -%}
{% if add_generation_prompt and messages[-1]['role'] != 'assistant' %}
<|start_header_id|>{{ messages | length +1 }}<|end_header_id|>
{% endif %}
"""
messages = [
{"role": "user", "content": "speculate thread\nwhat will ai land be like in 2025"},
]
tokenizer.chat_template = template
s = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
print("Formatted message\n", s)
inputs = tokenizer([s], return_tensors="pt", padding="max_length", max_length=1024, truncation=True, padding_side='left').to("cuda")
with torch.no_grad():
inputs = tokenizer([s], return_tensors="pt",
).to("cuda")
r = model.generate(**inputs,
max_length=1000000,
pad_token_id=tokenizer.eos_token_id, early_stopping=False, min_new_tokens=32,)
o = tokenizer.decode(r[0], skip_special_tokens=False)
o = o.replace("<|start_header_id|>", "\n<|start_header_id|>")
print("Input and generate\n", o)
<|begin_of_text|>g
<|start_header_id|>1<|end_header_id|>
speculate thread
what will ai land be like in 2025
<|start_header_id|>2<|end_header_id|>
>AI will take over the world
>it will be bad
I don't care
<|start_header_id|>3<|end_header_id|>>>2
i do care
<|start_header_id|>4<|end_header_id|>>>1 (OP)
>what will ai land be like in 2025
A bunch of b
Ah never mind, I think I got it. It was sensitive to the exact template. I've updated the above to working code and will leave it here for others to use/improve
wassname
changed discussion status to
closed