futranbg commited on
Commit
06342ef
·
1 Parent(s): 146dda3

app: llm: more config

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -7,19 +7,21 @@ from langchain.llms import CTransformers
7
  #checkpoint = "cmarkea/bloomz-3b-sft-chat"
8
  #checkpoint = "bigscience/bloomz-7b1-mt" # non english
9
  #checkpoint = os.getenv('HF_BLOOM_MODEL')
10
- max_new_tokens = 128
11
- temperature = 0.8
12
- top_p = 0.5
13
- repetition_penalty = 1
14
- num_beams = 2
 
 
15
 
16
- from ctransformers import AutoModelForCausalLM
17
 
18
  # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
19
- llm = CTransformers(model="TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF", model_file="wizardlm-1.0-uncensored-llama2-13b.Q4_0.gguf")
20
 
21
  def response(prompt):
22
- return llm(prompt)
 
23
 
24
  if __name__ == '__main__':
25
 
 
7
  #checkpoint = "cmarkea/bloomz-3b-sft-chat"
8
  #checkpoint = "bigscience/bloomz-7b1-mt" # non english
9
  #checkpoint = os.getenv('HF_BLOOM_MODEL')
10
+ llm_config = {
11
+ 'max_new_tokens': 256,
12
+ 'temperature' = 0.8,
13
+ 'top_p' = 0.5,
14
+ 'num_beams' = 2,
15
+ 'repetition_penalty': 1.1,
16
+ }
17
 
 
18
 
19
  # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
20
+ llm = CTransformers(model="TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF", model_file="wizardlm-1.0-uncensored-llama2-13b.Q4_0.gguf", config=llm_config)
21
 
22
  def response(prompt):
23
+ txt = llm(prompt)
24
+ return txt
25
 
26
  if __name__ == '__main__':
27