Spaces:

KumaTea
/

KumaGLM

Runtime error

KumaGLM / model.py

modulize

6cf18af almost 2 years ago

1 kB

	import torch
	from session import logger, log_sys_info
	from transformers import AutoTokenizer, GenerationConfig, AutoModel


	chatglm = 'THUDM/chatglm-6b'
	chatglm_rev = '4de8efe'
	int8_model = 'KumaTea/twitter-int8'
	int8_model_rev = '1136001'

	# import subprocess
	# result = subprocess.run(['git', 'clone', 'https://huggingface.co/KumaTea/twitter-int8', 'model'], capture_output=True, text=True)
	# print(result.stdout)

	# device = torch.device('cpu')
	# torch.cuda.current_device = lambda : device

	log_sys_info()

	model = AutoModel.from_pretrained(
	int8_model,
	trust_remote_code=True,
	revision=int8_model_rev
	).float() # .to(device)
	tokenizer = AutoTokenizer.from_pretrained(chatglm, trust_remote_code=True, revision=chatglm_rev)

	# dump a log to ensure everything works well
	# print(model.peft_config)
	# We have to use full precision, as some tokens are >65535
	model.eval()
	# print(model)

	torch.set_default_tensor_type(torch.FloatTensor)

	logger.info('[SYS] Model loaded')
	log_sys_info()