ๅŸบไบŽSakura-14B-Qwen2beta-Base-v2๏ผŒๅœจ้Ÿฉๆ–‡่ฝปๅฐ่ฏด็ฟป่ฏ‘ๆ•ฐๆฎไธŠๅพฎ่ฐƒ๏ผˆๅŒ…ๅซ713ๆœฌๆ—ฅ่ฝป็š„้Ÿฉ็ฟปๅ’Œไธญ็ฟปๅฏน็…งไปฅๅŠ14ๆœฌ้Ÿฉ่ฝป็š„ไธญ็ฟป๏ผ‰

ๆจกๅž‹ไป…ๆ”ฏๆŒ้Ÿฉๆ–‡โ†’็ฎ€ไฝ“ไธญๆ–‡็š„็ฟป่ฏ‘

from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

model_path = 'SakuraLLM/LN-Korean-14B-v0.2.1'
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto', trust_remote_code=True).eval()
model.generation_config = GenerationConfig.from_pretrained(model_path, trust_remote_code=True)

# ๆฎต่ฝไน‹้—ด็”จ\nๅˆ†้š”
text = '''์—ฌ์ž์• ๋“ค์ด ์ž์‹ ๋“ค์˜ ์ฒซ ๊ฒฝํ—˜์— ๋Œ€ํ•œ ์ด์•ผ๊ธฐ๋ฅผ ํ•˜๋Š” ๊ฑธ ๋“ค์€ ์ ์ด ์žˆ๋Š”๊ฐ€.
๋ฌผ๋ก  ์—ฌ๊ธฐ์„œ ์ฒซ ๊ฒฝํ—˜์ด๋ผ๋Š” ๊ฒƒ์€ ์ฒ˜์Œ์œผ๋กœ ์•ผ์ž๋ฅผ ์จŒ๋‹ค๋“ ๊ฐ€ ์ฒ˜์Œ์œผ๋กœ ์ˆ ์„ ๋งˆ์…” ๋ดค๋‹ค๋“ ๊ฐ€ ๊ทธ๋Ÿฐ ๊ฒƒ์ด ์•„๋‹ˆ๋ผ, ๋ช…์‹ค๊ณตํžˆ ๊ทธ๋ ‡๊ณ  ๊ทธ๋Ÿฐ ์˜๋ฏธ์—์„œ์˜ ์ฒซ ๊ฒฝํ—˜์ด๋‹ค.
โ€œ์šฐ, ์šฐ๋ฆฌ๊ฐ€โ€ฆโ€ฆ ์ฒ˜์Œ์œผ๋กœ ๊ทธ, ๊ทธ๊ฑธ ํ•œ ๊ฑฐ๋Š” ๋ง์ด์•ผ.โ€
๊ทธ๋ ‡๊ฒŒ ๋งํ•œ ๊ฒƒ์€ ์†ŒํŒŒ์— ์•‰์•„ ์žˆ๋Š” ๊ฐˆ์ƒ‰ ๊ต๋ณต์˜ ์†Œ๋…€์˜€๋‹ค. ๋‘ฅ๊ทผ ์–ผ๊ตด์— ์ปค๋‹ค๋ž€ ๊ฐˆ์ƒ‰ ๋ˆˆ๋™์ž๋ฅผ ์ง€๋‹Œ, ๋ถ€๋“œ๋Ÿฌ์šด ๋จธ๋ฆฌ์นด๋ฝ์„ ์–ด๊นจ ์œ„๋กœ ๋Š˜์–ด๋œจ๋ฆฌ๊ณ  ์žˆ๋Š” ์†Œ๋…€๋‹ค. ์ „๋ฐ˜์ ์œผ๋กœ ์–Œ์ „ํ•œ ๋ชจ๋ฒ”์ƒ ๊ฐ™์•„ ๋ณด์ด๋Š” ์ธ์ƒ์ด๊ณ  ๋ชธ์ง‘๋„ ์•„๋‹ดํ•œ ํŽธ์ด์ง€๋งŒ, ๊ต๋ณต ์ƒ์˜๋ฅผ ๋งคํ˜น์ ์œผ๋กœ ๋ถ€ํ’€์–ด ์˜ค๋ฅด๊ฒŒ ํ•˜๊ณ  ์žˆ๋Š” ๊ฐ€์Šด๋งŒํผ์€ ์–Œ์ „ํ•˜์ง€๋„ ์•„๋‹ดํ•˜์ง€๋„ ์•Š์•˜๋‹ค. ๋ชธ์„ ์›€์ธ ๋ฆฐ ์ž์„ธ ํƒ“์— ๋‘ ํŒ”์ด ๊ฐ€์Šด์„ ์–‘์˜†์—์„œ ์••๋ฐ•ํ•˜๊ณ  ์žˆ์–ด, ๋ชธ์„ ์›€์ง์ผ ๋•Œ๋งˆ๋‹ค ๊ทธ ์œค๊ณฝ์ด ๋ถ€๋“œ๋Ÿฝ๊ฒŒ ์ผ๊ทธ๋Ÿฌ์กŒ๋‹ค.'''

# ๆ–‡ๆœฌ้•ฟๅบฆๆŽงๅˆถๅœจ1024ไปฅๅ†…
assert len(text) < 1024

messages = [
    {'role': 'user', 'content': f'็ฟป่ฏ‘ๆˆไธญๆ–‡๏ผš\n{text}'}
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)

model_inputs = tokenizer([text], return_tensors='pt').to('cuda')

generated_ids = model.generate(
    model_inputs.input_ids,
    max_new_tokens=1024
)

generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
print(response)
Downloads last month
32
Safetensors
Model size
14.2B params
Tensor type
BF16
ยท
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.