import torch
from session import logger, log_sys_info
from transformers import AutoTokenizer, GenerationConfig, AutoModel


chatglm = 'THUDM/chatglm-6b'
chatglm_rev = '4de8efe'
int8_model = 'KumaTea/twitter-int8'
int8_model_rev = '1136001'

# import subprocess
# result = subprocess.run(['git', 'clone', 'https://huggingface.co/KumaTea/twitter-int8', 'model'], capture_output=True, text=True)
# print(result.stdout)

# device = torch.device('cpu')
# torch.cuda.current_device = lambda : device

log_sys_info()

model = AutoModel.from_pretrained(
    int8_model,
    trust_remote_code=True,
    revision=int8_model_rev
).float()  # .to(device)
tokenizer = AutoTokenizer.from_pretrained(chatglm, trust_remote_code=True, revision=chatglm_rev)

# dump a log to ensure everything works well
# print(model.peft_config)
# We have to use full precision, as some tokens are >65535
model.eval()
# print(model)

torch.set_default_tensor_type(torch.FloatTensor)

logger.info('[SYS] Model loaded')
log_sys_info()