|  | import torch | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if torch.cuda.is_available(): | 
					
						
						|  | torch_device = "cuda" | 
					
						
						|  | gpu_layers = 100 | 
					
						
						|  | else: | 
					
						
						|  | torch_device =  "cpu" | 
					
						
						|  | gpu_layers = 0 | 
					
						
						|  |  | 
					
						
						|  | print("Running on device:", torch_device) | 
					
						
						|  | threads = 8 | 
					
						
						|  | print("CPU threads:", threads) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | temperature: float = 0.1 | 
					
						
						|  | top_k: int = 3 | 
					
						
						|  | top_p: float = 1 | 
					
						
						|  | repetition_penalty: float = 1.15 | 
					
						
						|  | flan_alpaca_repetition_penalty: float = 1.3 | 
					
						
						|  | last_n_tokens: int = 64 | 
					
						
						|  | max_new_tokens: int = 1024 | 
					
						
						|  | seed: int = 42 | 
					
						
						|  | reset: bool = False | 
					
						
						|  | stream: bool = True | 
					
						
						|  | threads: int = threads | 
					
						
						|  | batch_size:int = 256 | 
					
						
						|  | context_length:int = 2048 | 
					
						
						|  | sample = True | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | max_tokens = 4096 | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class CtransInitConfig_gpu: | 
					
						
						|  | def __init__(self, | 
					
						
						|  | last_n_tokens=last_n_tokens, | 
					
						
						|  | seed=seed, | 
					
						
						|  | n_threads=threads, | 
					
						
						|  | n_batch=batch_size, | 
					
						
						|  | n_ctx=max_tokens, | 
					
						
						|  | n_gpu_layers=gpu_layers): | 
					
						
						|  |  | 
					
						
						|  | self.last_n_tokens = last_n_tokens | 
					
						
						|  | self.seed = seed | 
					
						
						|  | self.n_threads = n_threads | 
					
						
						|  | self.n_batch = n_batch | 
					
						
						|  | self.n_ctx = n_ctx | 
					
						
						|  | self.n_gpu_layers = n_gpu_layers | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | def update_gpu(self, new_value): | 
					
						
						|  | self.n_gpu_layers = new_value | 
					
						
						|  |  | 
					
						
						|  | class CtransInitConfig_cpu(CtransInitConfig_gpu): | 
					
						
						|  | def __init__(self): | 
					
						
						|  | super().__init__() | 
					
						
						|  | self.n_gpu_layers = 0 | 
					
						
						|  |  | 
					
						
						|  | gpu_config = CtransInitConfig_gpu() | 
					
						
						|  | cpu_config = CtransInitConfig_cpu() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | class CtransGenGenerationConfig: | 
					
						
						|  | def __init__(self, temperature=temperature, | 
					
						
						|  | top_k=top_k, | 
					
						
						|  | top_p=top_p, | 
					
						
						|  | repeat_penalty=repetition_penalty, | 
					
						
						|  | seed=seed, | 
					
						
						|  | stream=stream, | 
					
						
						|  | max_tokens=max_new_tokens | 
					
						
						|  | ): | 
					
						
						|  | self.temperature = temperature | 
					
						
						|  | self.top_k = top_k | 
					
						
						|  | self.top_p = top_p | 
					
						
						|  | self.repeat_penalty = repeat_penalty | 
					
						
						|  | self.seed = seed | 
					
						
						|  | self.max_tokens=max_tokens | 
					
						
						|  | self.stream = stream | 
					
						
						|  |  | 
					
						
						|  | def update_temp(self, new_value): | 
					
						
						|  | self.temperature = new_value |