Spaces:
Running
Running
MekkCyber
commited on
Commit
·
a24abf2
1
Parent(s):
fe44b10
fixes
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ from torchao.quantization import (
|
|
| 18 |
|
| 19 |
MAP_QUANT_TYPE_TO_NAME = {
|
| 20 |
"Int4WeightOnly": "int4wo",
|
| 21 |
-
"GemliteUIntXWeightOnly": "intxwo-gemlite"
|
| 22 |
"Int8WeightOnly": "int8wo",
|
| 23 |
"Int8DynamicActivationInt8Weight": "int8da8w8",
|
| 24 |
"Float8WeightOnly": "float8wo",
|
|
@@ -61,14 +61,15 @@ def check_model_exists(
|
|
| 61 |
if (
|
| 62 |
quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
|
| 63 |
) and (group_size is not None):
|
| 64 |
-
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type
|
| 65 |
else:
|
| 66 |
-
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type
|
| 67 |
if repo_name in model_names:
|
| 68 |
return f"Model '{repo_name}' already exists in your repository."
|
| 69 |
else:
|
| 70 |
return None # Model does not exist
|
| 71 |
except Exception as e:
|
|
|
|
| 72 |
return f"Error checking model existence: {str(e)}"
|
| 73 |
|
| 74 |
|
|
@@ -237,9 +238,9 @@ def save_model(
|
|
| 237 |
if (
|
| 238 |
quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
|
| 239 |
) and (group_size is not None):
|
| 240 |
-
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type
|
| 241 |
else:
|
| 242 |
-
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type
|
| 243 |
progress(0.70, desc="Creating model card")
|
| 244 |
model_card = create_model_card(model_name, quantization_type, group_size)
|
| 245 |
with open(os.path.join(tmpdirname, "README.md"), "w") as f:
|
|
@@ -494,14 +495,14 @@ with gr.Blocks(css=css) as demo:
|
|
| 494 |
info="Select the Quantization method",
|
| 495 |
choices=[
|
| 496 |
"Int4WeightOnly",
|
| 497 |
-
"GemliteUIntXWeightOnly"
|
| 498 |
"Int8WeightOnly",
|
| 499 |
"Int8DynamicActivationInt8Weight",
|
| 500 |
"Float8WeightOnly",
|
| 501 |
"Float8DynamicActivationFloat8Weight",
|
| 502 |
"autoquant",
|
| 503 |
],
|
| 504 |
-
value="
|
| 505 |
filterable=False,
|
| 506 |
show_label=False,
|
| 507 |
)
|
|
@@ -509,7 +510,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 509 |
group_size = gr.Textbox(
|
| 510 |
info="Group Size (only for int4_weight_only and int8_weight_only)",
|
| 511 |
value="128",
|
| 512 |
-
interactive=(quantization_type.value == "
|
| 513 |
show_label=False,
|
| 514 |
)
|
| 515 |
|
|
|
|
| 18 |
|
| 19 |
MAP_QUANT_TYPE_TO_NAME = {
|
| 20 |
"Int4WeightOnly": "int4wo",
|
| 21 |
+
"GemliteUIntXWeightOnly": "intxwo-gemlite",
|
| 22 |
"Int8WeightOnly": "int8wo",
|
| 23 |
"Int8DynamicActivationInt8Weight": "int8da8w8",
|
| 24 |
"Float8WeightOnly": "float8wo",
|
|
|
|
| 61 |
if (
|
| 62 |
quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
|
| 63 |
) and (group_size is not None):
|
| 64 |
+
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}-gs{group_size}"
|
| 65 |
else:
|
| 66 |
+
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}"
|
| 67 |
if repo_name in model_names:
|
| 68 |
return f"Model '{repo_name}' already exists in your repository."
|
| 69 |
else:
|
| 70 |
return None # Model does not exist
|
| 71 |
except Exception as e:
|
| 72 |
+
# raise e
|
| 73 |
return f"Error checking model existence: {str(e)}"
|
| 74 |
|
| 75 |
|
|
|
|
| 238 |
if (
|
| 239 |
quantization_type in ["Int4WeightOnly", "GemliteUIntXWeightOnly"]
|
| 240 |
) and (group_size is not None):
|
| 241 |
+
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}-gs{group_size}"
|
| 242 |
else:
|
| 243 |
+
repo_name = f"{username}/{model_name.split('/')[-1]}-ao-{MAP_QUANT_TYPE_TO_NAME[quantization_type]}"
|
| 244 |
progress(0.70, desc="Creating model card")
|
| 245 |
model_card = create_model_card(model_name, quantization_type, group_size)
|
| 246 |
with open(os.path.join(tmpdirname, "README.md"), "w") as f:
|
|
|
|
| 495 |
info="Select the Quantization method",
|
| 496 |
choices=[
|
| 497 |
"Int4WeightOnly",
|
| 498 |
+
"GemliteUIntXWeightOnly",
|
| 499 |
"Int8WeightOnly",
|
| 500 |
"Int8DynamicActivationInt8Weight",
|
| 501 |
"Float8WeightOnly",
|
| 502 |
"Float8DynamicActivationFloat8Weight",
|
| 503 |
"autoquant",
|
| 504 |
],
|
| 505 |
+
value="Int8WeightOnly",
|
| 506 |
filterable=False,
|
| 507 |
show_label=False,
|
| 508 |
)
|
|
|
|
| 510 |
group_size = gr.Textbox(
|
| 511 |
info="Group Size (only for int4_weight_only and int8_weight_only)",
|
| 512 |
value="128",
|
| 513 |
+
interactive=(quantization_type.value == "Int4WeightOnly" or quantization_type.value == "Int8WeightOnly"),
|
| 514 |
show_label=False,
|
| 515 |
)
|
| 516 |
|