Spaces:

futranbg
/

chat-hf

Sleeping

futranbg commited on Sep 18, 2023

Commit

88417e5

1 Parent(s): cff4853

app: llm: avx2?

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,9 +3,6 @@ import gradio as gr
 #import time
 from langchain.llms import CTransformers
-#checkpoint = "bigscience/bloomz" # english
-#checkpoint = "cmarkea/bloomz-3b-sft-chat"
-#checkpoint = "bigscience/bloomz-7b1-mt" # non english
 model_repo = os.getenv('HF_MODEL_REPO')
 model_bin = os.getenv('HF_MODEL_BIN')
 llm_config = {
@@ -15,9 +12,7 @@ llm_config = {
           'repetition_penalty': 1.1,
           }
-# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
-llm = CTransformers(model=model_repo, model_file=model_bin, config=llm_config)
 def response(prompt):
     txt = llm(prompt)

 #import time
 from langchain.llms import CTransformers
 model_repo = os.getenv('HF_MODEL_REPO')
 model_bin = os.getenv('HF_MODEL_BIN')
 llm_config = {
           'repetition_penalty': 1.1,
           }
+llm = CTransformers(model=model_repo, model_file=model_bin, config=llm_config, lib="avx2")
 def response(prompt):
     txt = llm(prompt)