KumaGLM / model.py
KumaTea's picture
modulize
6cf18af
import torch
from session import logger, log_sys_info
from transformers import AutoTokenizer, GenerationConfig, AutoModel
chatglm = 'THUDM/chatglm-6b'
chatglm_rev = '4de8efe'
int8_model = 'KumaTea/twitter-int8'
int8_model_rev = '1136001'
# import subprocess
# result = subprocess.run(['git', 'clone', 'https://huggingface.co/KumaTea/twitter-int8', 'model'], capture_output=True, text=True)
# print(result.stdout)
# device = torch.device('cpu')
# torch.cuda.current_device = lambda : device
log_sys_info()
model = AutoModel.from_pretrained(
int8_model,
trust_remote_code=True,
revision=int8_model_rev
).float() # .to(device)
tokenizer = AutoTokenizer.from_pretrained(chatglm, trust_remote_code=True, revision=chatglm_rev)
# dump a log to ensure everything works well
# print(model.peft_config)
# We have to use full precision, as some tokens are >65535
model.eval()
# print(model)
torch.set_default_tensor_type(torch.FloatTensor)
logger.info('[SYS] Model loaded')
log_sys_info()