gguf-my-repo

Sleeping

App Files Files Community

Oleg Shulyakov commited on Aug 5

Commit

57256b4

1 Parent(s): f3a3278

Update paths

Browse files

Files changed (1) hide show

app.py +6 -6

app.py CHANGED Viewed

@@ -15,7 +15,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
 # used for restarting the space
 SPACE_ID = os.environ.get("SPACE_ID")
 HF_TOKEN = os.environ.get("HF_TOKEN")
-CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
 # escape HTML for logging
 def escape(s: str) -> str:
@@ -28,7 +28,7 @@ def escape(s: str) -> str:
 def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
     imatrix_command = [
-        "./llama.cpp/llama-imatrix",
         "-m", model_path,
         "-f", train_data_path,
         "-ngl", "99",
@@ -63,7 +63,7 @@ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token:
         raise ValueError("You have to be logged in.")
     split_cmd = [
-        "./llama.cpp/llama-gguf-split",
         "--split",
     ]
     if split_max_size:
@@ -185,7 +185,7 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
                 if train_data_file:
                     train_data_path = train_data_file.name
                 else:
-                    train_data_path = "llama.cpp/train_data.txt" #fallback calibration dataset
                 print(f"Training data file path: {train_data_path}")
@@ -201,12 +201,12 @@ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_rep
             quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
             if use_imatrix:
                 quantise_ggml = [
-                    "./llama.cpp/llama-quantize",
                     "--imatrix", imatrix_path, fp16, quantized_gguf_path, imatrix_q_method
                 ]
             else:
                 quantise_ggml = [
-                    "./llama.cpp/llama-quantize",
                     fp16, quantized_gguf_path, q_method
                 ]
             result = subprocess.run(quantise_ggml, shell=False, capture_output=True)

 # used for restarting the space
 SPACE_ID = os.environ.get("SPACE_ID")
 HF_TOKEN = os.environ.get("HF_TOKEN")
+CONVERSION_SCRIPT = "/app/convert_hf_to_gguf.py"
 # escape HTML for logging
 def escape(s: str) -> str:
 def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
     imatrix_command = [
+        "llama-imatrix",
         "-m", model_path,
         "-f", train_data_path,
         "-ngl", "99",
         raise ValueError("You have to be logged in.")
     split_cmd = [
+        "llama-gguf-split",
         "--split",
     ]
     if split_max_size:
                 if train_data_file:
                     train_data_path = train_data_file.name
                 else:
+                    train_data_path = "train_data.txt" #fallback calibration dataset
                 print(f"Training data file path: {train_data_path}")
             quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
             if use_imatrix:
                 quantise_ggml = [
+                    "llama-quantize",
                     "--imatrix", imatrix_path, fp16, quantized_gguf_path, imatrix_q_method
                 ]
             else:
                 quantise_ggml = [
+                    "llama-quantize",
                     fp16, quantized_gguf_path, q_method
                 ]
             result = subprocess.run(quantise_ggml, shell=False, capture_output=True)