Spaces:

futranbg
/

chat-hf

Sleeping

futranbg commited on Sep 18, 2023

Commit

06342ef

1 Parent(s): 146dda3

app: llm: more config

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,19 +7,21 @@ from langchain.llms import CTransformers
 #checkpoint = "cmarkea/bloomz-3b-sft-chat"
 #checkpoint = "bigscience/bloomz-7b1-mt" # non english
 #checkpoint = os.getenv('HF_BLOOM_MODEL')
-max_new_tokens = 128
-temperature = 0.8
-top_p = 0.5
-repetition_penalty = 1
-num_beams = 2
-from ctransformers import AutoModelForCausalLM
 # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
-llm = CTransformers(model="TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF", model_file="wizardlm-1.0-uncensored-llama2-13b.Q4_0.gguf")
 def response(prompt):
-    return llm(prompt)
 if __name__ == '__main__':

 #checkpoint = "cmarkea/bloomz-3b-sft-chat"
 #checkpoint = "bigscience/bloomz-7b1-mt" # non english
 #checkpoint = os.getenv('HF_BLOOM_MODEL')
+llm_config = {
+          'max_new_tokens': 256,
+          'temperature' = 0.8,
+          'top_p' = 0.5,
+          'num_beams' = 2,
+          'repetition_penalty': 1.1,
+          }
 # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
+llm = CTransformers(model="TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF", model_file="wizardlm-1.0-uncensored-llama2-13b.Q4_0.gguf", config=llm_config)
 def response(prompt):
+    txt = llm(prompt)
+    return txt
 if __name__ == '__main__':