File size: 1,000 Bytes
6cf18af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import torch
from session import logger, log_sys_info
from transformers import AutoTokenizer, GenerationConfig, AutoModel
chatglm = 'THUDM/chatglm-6b'
chatglm_rev = '4de8efe'
int8_model = 'KumaTea/twitter-int8'
int8_model_rev = '1136001'
# import subprocess
# result = subprocess.run(['git', 'clone', 'https://huggingface.co/KumaTea/twitter-int8', 'model'], capture_output=True, text=True)
# print(result.stdout)
# device = torch.device('cpu')
# torch.cuda.current_device = lambda : device
log_sys_info()
model = AutoModel.from_pretrained(
int8_model,
trust_remote_code=True,
revision=int8_model_rev
).float() # .to(device)
tokenizer = AutoTokenizer.from_pretrained(chatglm, trust_remote_code=True, revision=chatglm_rev)
# dump a log to ensure everything works well
# print(model.peft_config)
# We have to use full precision, as some tokens are >65535
model.eval()
# print(model)
torch.set_default_tensor_type(torch.FloatTensor)
logger.info('[SYS] Model loaded')
log_sys_info()
|