Update README.md
Browse files
README.md
CHANGED
@@ -39,7 +39,7 @@ These are the merged version: after training the adapters, we merge the original
|
|
39 |
```python
|
40 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
41 |
# model_name = "mistralai/Mistral-7B-v0.1" # Base Model
|
42 |
-
model_name = "h-j-han/Mistral-7B-VocADT-50k-
|
43 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
44 |
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
|
45 |
prefix = "\nEnglish: Hello \nKorean: μλ
νμΈμ \nEnglish: Thank you\nKorean: κ³ λ§μ΅λλ€\nEnglish: "
|
@@ -49,7 +49,7 @@ prompt = prefix + line + suffix
|
|
49 |
inputs = tokenizer(prompt, return_tensors="pt")
|
50 |
for item in inputs:
|
51 |
inputs[item] = inputs[item].cuda()
|
52 |
-
outputs = model.generate(**inputs, max_new_tokens=
|
53 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
54 |
# Base Model Output: "λλ ν" # This short incomplete phrase in Korean is 5 tokens for the base model.
|
55 |
# VocADT Output: "μ λ νμμ
λλ€." # Complete and good output within 5 tokens
|
|
|
39 |
```python
|
40 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
41 |
# model_name = "mistralai/Mistral-7B-v0.1" # Base Model
|
42 |
+
model_name = "h-j-han/Mistral-7B-VocADT-50k-All" # Vocabulary Adapted Model
|
43 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
44 |
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
|
45 |
prefix = "\nEnglish: Hello \nKorean: μλ
νμΈμ \nEnglish: Thank you\nKorean: κ³ λ§μ΅λλ€\nEnglish: "
|
|
|
49 |
inputs = tokenizer(prompt, return_tensors="pt")
|
50 |
for item in inputs:
|
51 |
inputs[item] = inputs[item].cuda()
|
52 |
+
outputs = model.generate(**inputs, max_new_tokens=5)
|
53 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
54 |
# Base Model Output: "λλ ν" # This short incomplete phrase in Korean is 5 tokens for the base model.
|
55 |
# VocADT Output: "μ λ νμμ
λλ€." # Complete and good output within 5 tokens
|