futranbg commited on
Commit
88417e5
·
1 Parent(s): cff4853

app: llm: avx2?

Browse files
Files changed (1) hide show
  1. app.py +1 -6
app.py CHANGED
@@ -3,9 +3,6 @@ import gradio as gr
3
  #import time
4
  from langchain.llms import CTransformers
5
 
6
- #checkpoint = "bigscience/bloomz" # english
7
- #checkpoint = "cmarkea/bloomz-3b-sft-chat"
8
- #checkpoint = "bigscience/bloomz-7b1-mt" # non english
9
  model_repo = os.getenv('HF_MODEL_REPO')
10
  model_bin = os.getenv('HF_MODEL_BIN')
11
  llm_config = {
@@ -15,9 +12,7 @@ llm_config = {
15
  'repetition_penalty': 1.1,
16
  }
17
 
18
-
19
- # Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
20
- llm = CTransformers(model=model_repo, model_file=model_bin, config=llm_config)
21
 
22
  def response(prompt):
23
  txt = llm(prompt)
 
3
  #import time
4
  from langchain.llms import CTransformers
5
 
 
 
 
6
  model_repo = os.getenv('HF_MODEL_REPO')
7
  model_bin = os.getenv('HF_MODEL_BIN')
8
  llm_config = {
 
12
  'repetition_penalty': 1.1,
13
  }
14
 
15
+ llm = CTransformers(model=model_repo, model_file=model_bin, config=llm_config, lib="avx2")
 
 
16
 
17
  def response(prompt):
18
  txt = llm(prompt)