app: llm: avx2?
Browse files
app.py
CHANGED
@@ -3,9 +3,6 @@ import gradio as gr
|
|
3 |
#import time
|
4 |
from langchain.llms import CTransformers
|
5 |
|
6 |
-
#checkpoint = "bigscience/bloomz" # english
|
7 |
-
#checkpoint = "cmarkea/bloomz-3b-sft-chat"
|
8 |
-
#checkpoint = "bigscience/bloomz-7b1-mt" # non english
|
9 |
model_repo = os.getenv('HF_MODEL_REPO')
|
10 |
model_bin = os.getenv('HF_MODEL_BIN')
|
11 |
llm_config = {
|
@@ -15,9 +12,7 @@ llm_config = {
|
|
15 |
'repetition_penalty': 1.1,
|
16 |
}
|
17 |
|
18 |
-
|
19 |
-
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
|
20 |
-
llm = CTransformers(model=model_repo, model_file=model_bin, config=llm_config)
|
21 |
|
22 |
def response(prompt):
|
23 |
txt = llm(prompt)
|
|
|
3 |
#import time
|
4 |
from langchain.llms import CTransformers
|
5 |
|
|
|
|
|
|
|
6 |
model_repo = os.getenv('HF_MODEL_REPO')
|
7 |
model_bin = os.getenv('HF_MODEL_BIN')
|
8 |
llm_config = {
|
|
|
12 |
'repetition_penalty': 1.1,
|
13 |
}
|
14 |
|
15 |
+
llm = CTransformers(model=model_repo, model_file=model_bin, config=llm_config, lib="avx2")
|
|
|
|
|
16 |
|
17 |
def response(prompt):
|
18 |
txt = llm(prompt)
|