app: llm: more config
Browse files
app.py
CHANGED
@@ -7,19 +7,21 @@ from langchain.llms import CTransformers
|
|
7 |
#checkpoint = "cmarkea/bloomz-3b-sft-chat"
|
8 |
#checkpoint = "bigscience/bloomz-7b1-mt" # non english
|
9 |
#checkpoint = os.getenv('HF_BLOOM_MODEL')
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
num_beams = 2
|
|
|
|
|
15 |
|
16 |
-
from ctransformers import AutoModelForCausalLM
|
17 |
|
18 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
19 |
-
llm = CTransformers(model="TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF", model_file="wizardlm-1.0-uncensored-llama2-13b.Q4_0.gguf")
|
20 |
|
21 |
def response(prompt):
|
22 |
-
|
|
|
23 |
|
24 |
if __name__ == '__main__':
|
25 |
|
|
|
7 |
#checkpoint = "cmarkea/bloomz-3b-sft-chat"
|
8 |
#checkpoint = "bigscience/bloomz-7b1-mt" # non english
|
9 |
#checkpoint = os.getenv('HF_BLOOM_MODEL')
|
10 |
+
llm_config = {
|
11 |
+
'max_new_tokens': 256,
|
12 |
+
'temperature' = 0.8,
|
13 |
+
'top_p' = 0.5,
|
14 |
+
'num_beams' = 2,
|
15 |
+
'repetition_penalty': 1.1,
|
16 |
+
}
|
17 |
|
|
|
18 |
|
19 |
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
20 |
+
llm = CTransformers(model="TheBloke/WizardLM-1.0-Uncensored-Llama2-13B-GGUF", model_file="wizardlm-1.0-uncensored-llama2-13b.Q4_0.gguf", config=llm_config)
|
21 |
|
22 |
def response(prompt):
|
23 |
+
txt = llm(prompt)
|
24 |
+
return txt
|
25 |
|
26 |
if __name__ == '__main__':
|
27 |
|