Sean-Case
commited on
Commit
·
0a7a8db
1
Parent(s):
8249fd3
Build fail when gpu_layers > 0, so will set to 0 at start and modify in app.
Browse files- app.py +4 -4
- chatfuncs/chatfuncs.py +2 -2
app.py
CHANGED
|
@@ -81,7 +81,7 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
|
|
| 81 |
|
| 82 |
if model_type == "Mistral Open Orca (larger, slow)":
|
| 83 |
if torch_device == "cuda":
|
| 84 |
-
gpu_config.update_gpu(
|
| 85 |
else:
|
| 86 |
gpu_config.update_gpu(gpu_layers)
|
| 87 |
cpu_config.update_gpu(gpu_layers)
|
|
@@ -94,12 +94,12 @@ def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_d
|
|
| 94 |
try:
|
| 95 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 96 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 97 |
-
model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='
|
| 98 |
|
| 99 |
except:
|
| 100 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
|
| 101 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 102 |
-
model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='
|
| 103 |
|
| 104 |
tokenizer = []
|
| 105 |
|
|
@@ -233,7 +233,7 @@ with block:
|
|
| 233 |
with gr.Tab("Advanced features"):
|
| 234 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Mistral Open Orca (larger, slow)"])
|
| 235 |
with gr.Row():
|
| 236 |
-
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=
|
| 237 |
change_model_button = gr.Button(value="Load model", scale=0)
|
| 238 |
load_text = gr.Text(label="Load status")
|
| 239 |
|
|
|
|
| 81 |
|
| 82 |
if model_type == "Mistral Open Orca (larger, slow)":
|
| 83 |
if torch_device == "cuda":
|
| 84 |
+
gpu_config.update_gpu(gpu_layers)
|
| 85 |
else:
|
| 86 |
gpu_config.update_gpu(gpu_layers)
|
| 87 |
cpu_config.update_gpu(gpu_layers)
|
|
|
|
| 94 |
try:
|
| 95 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 96 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 97 |
+
model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(gpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 98 |
|
| 99 |
except:
|
| 100 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Orca-Mini-3B-gguf', model_type='llama', model_file='q5_0-orca-mini-3b.gguf', **vars(cpu_config)) #**asdict(CtransRunConfig_gpu())
|
| 101 |
#model = AutoModelForCausalLM.from_pretrained('Aryanne/Wizard-Orca-3B-gguf', model_type='llama', model_file='q4_1-wizard-orca-3b.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 102 |
+
model = AutoModelForCausalLM.from_pretrained('TheBloke/Mistral-7B-OpenOrca-GGUF', model_type='mistral', model_file='mistral-7b-openorca.Q4_K_M.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
|
| 103 |
|
| 104 |
tokenizer = []
|
| 105 |
|
|
|
|
| 233 |
with gr.Tab("Advanced features"):
|
| 234 |
model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca (small, fast)", choices = ["Flan Alpaca (small, fast)", "Mistral Open Orca (larger, slow)"])
|
| 235 |
with gr.Row():
|
| 236 |
+
gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (WARNING: please don't modify unless you have a GPU).", value=0, minimum=0, maximum=20, step = 1, visible=True)
|
| 237 |
change_model_button = gr.Button(value="Load model", scale=0)
|
| 238 |
load_text = gr.Text(label="Load status")
|
| 239 |
|
chatfuncs/chatfuncs.py
CHANGED
|
@@ -69,7 +69,7 @@ kw_model = pipeline("feature-extraction", model="sentence-transformers/all-MiniL
|
|
| 69 |
|
| 70 |
if torch.cuda.is_available():
|
| 71 |
torch_device = "cuda"
|
| 72 |
-
gpu_layers =
|
| 73 |
else:
|
| 74 |
torch_device = "cpu"
|
| 75 |
gpu_layers = 0
|
|
@@ -92,7 +92,7 @@ reset: bool = False
|
|
| 92 |
stream: bool = True
|
| 93 |
threads: int = threads
|
| 94 |
batch_size:int = 256
|
| 95 |
-
context_length:int =
|
| 96 |
sample = True
|
| 97 |
|
| 98 |
|
|
|
|
| 69 |
|
| 70 |
if torch.cuda.is_available():
|
| 71 |
torch_device = "cuda"
|
| 72 |
+
gpu_layers = 0
|
| 73 |
else:
|
| 74 |
torch_device = "cpu"
|
| 75 |
gpu_layers = 0
|
|
|
|
| 92 |
stream: bool = True
|
| 93 |
threads: int = threads
|
| 94 |
batch_size:int = 256
|
| 95 |
+
context_length:int = 4096
|
| 96 |
sample = True
|
| 97 |
|
| 98 |
|