Add model
Browse files- acip_model.py +44 -24
- config.json +6 -6
- parametrized_model.py +3 -3
acip_model.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from typing import Any
|
2 |
|
3 |
import torch
|
@@ -5,6 +6,8 @@ from transformers import PreTrainedModel
|
|
5 |
|
6 |
from .parametrized_model import ParametrizedModel, ParametrizedModelConfig
|
7 |
|
|
|
|
|
8 |
|
9 |
class ACIPModelConfig(ParametrizedModelConfig):
|
10 |
"""
|
@@ -24,7 +27,7 @@ class ACIPModel(ParametrizedModel):
|
|
24 |
It manages a `score_map` that stores the scores of the parametrized modules' target parameters,
|
25 |
which are updated during tuning by the ACIP method.
|
26 |
Moreover, it provides `prune_model_by_score` that prunes the target parameters of the model according to
|
27 |
-
their scores to achieve any given
|
28 |
|
29 |
Notes: The `score_map` is managed in float32 internally because a lower precision may lead to unexpected numerical
|
30 |
inaccuracies in the resulting parameter ranking. Fortunately, the memory consumption is negligible compared to
|
@@ -92,10 +95,10 @@ class ACIPModel(ParametrizedModel):
|
|
92 |
buffer.copy_(score.detach().float())
|
93 |
self._score_map[p_name] = buffer
|
94 |
|
95 |
-
def
|
96 |
"""
|
97 |
Helper function that checks what would happen if the k smallest target parameters are pruned
|
98 |
-
according to the global score map ranking. It returns the resulting
|
99 |
and the corresponding parameter masks.
|
100 |
|
101 |
Args:
|
@@ -103,7 +106,7 @@ class ACIPModel(ParametrizedModel):
|
|
103 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
104 |
See also `ParametrizedModel.get_num_params`.
|
105 |
|
106 |
-
Returns: Tuple of
|
107 |
"""
|
108 |
# Find the threshold value for the k smallest entries according to the global score map ranking.
|
109 |
score_map_cat = torch.cat([param.flatten() for param in self.score_map.values()])
|
@@ -114,55 +117,72 @@ class ACIPModel(ParametrizedModel):
|
|
114 |
for p_name, score in self.score_map.items():
|
115 |
param_masks[p_name] = (score > threshold).to(dtype=score.dtype)
|
116 |
|
117 |
-
# Compute hypothetical
|
118 |
-
|
119 |
-
return
|
120 |
|
121 |
-
def _get_param_masks(self,
|
122 |
"""
|
123 |
-
Helper function that determines which parameters to keep to reach a target
|
124 |
-
Instead of looping over `k ->
|
125 |
-
the
|
126 |
|
127 |
Args:
|
128 |
-
|
129 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
130 |
See also `ParametrizedModel.get_num_params`.
|
131 |
|
132 |
-
Returns: Parameter masks indicating which parameters to keep to reach the target
|
133 |
"""
|
134 |
-
if
|
135 |
return {p_name: torch.ones_like(score) for p_name, score in self.score_map.items()}
|
136 |
|
137 |
-
# Perform a binary search to find the smallest k such that the
|
138 |
# Here, k_lo and k_hi are the lower and upper bound of the search interval.
|
139 |
k_lo, k_hi = 1, sum(score.numel() for score in self.score_map.values())
|
140 |
while k_lo < k_hi:
|
141 |
k_mid = (k_lo + k_hi + 1) // 2 # round up to ensure low <= mid
|
142 |
-
ratio, _ = self.
|
143 |
-
if ratio >
|
144 |
k_lo = k_mid
|
145 |
else:
|
146 |
k_hi = k_mid - 1
|
147 |
k = k_lo
|
148 |
# TODO: handle tie-breaks
|
149 |
-
return self.
|
150 |
-
|
151 |
-
def prune_model_by_score(
|
|
|
|
|
|
|
|
|
|
|
152 |
"""
|
153 |
This method prunes the target parameters of the model according to their scores to achieve
|
154 |
-
a given
|
155 |
|
156 |
This can be efficiently implemented by a simple binary search strategy:
|
157 |
We find the smallest number of parameters to be pruned according to the score map ranking
|
158 |
-
such that the resulting
|
159 |
|
160 |
Args:
|
161 |
-
|
|
|
|
|
|
|
|
|
|
|
162 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
163 |
See also `ParametrizedModel.get_num_params`.
|
164 |
"""
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
# Reset the target parameters according to the parameter masks
|
168 |
for p_name, param in self.get_target_params().items():
|
|
|
1 |
+
import logging
|
2 |
from typing import Any
|
3 |
|
4 |
import torch
|
|
|
6 |
|
7 |
from .parametrized_model import ParametrizedModel, ParametrizedModelConfig
|
8 |
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
|
12 |
class ACIPModelConfig(ParametrizedModelConfig):
|
13 |
"""
|
|
|
27 |
It manages a `score_map` that stores the scores of the parametrized modules' target parameters,
|
28 |
which are updated during tuning by the ACIP method.
|
29 |
Moreover, it provides `prune_model_by_score` that prunes the target parameters of the model according to
|
30 |
+
their scores to achieve any given size ratio.
|
31 |
|
32 |
Notes: The `score_map` is managed in float32 internally because a lower precision may lead to unexpected numerical
|
33 |
inaccuracies in the resulting parameter ranking. Fortunately, the memory consumption is negligible compared to
|
|
|
95 |
buffer.copy_(score.detach().float())
|
96 |
self._score_map[p_name] = buffer
|
97 |
|
98 |
+
def _predict_size_ratio_by_score(self, k: int, full: bool = False) -> tuple[float, dict[str, torch.Tensor]]:
|
99 |
"""
|
100 |
Helper function that checks what would happen if the k smallest target parameters are pruned
|
101 |
+
according to the global score map ranking. It returns the resulting size ratio
|
102 |
and the corresponding parameter masks.
|
103 |
|
104 |
Args:
|
|
|
106 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
107 |
See also `ParametrizedModel.get_num_params`.
|
108 |
|
109 |
+
Returns: Tuple of size ratio and parameter masks. The masks indicate which parameters to keep.
|
110 |
"""
|
111 |
# Find the threshold value for the k smallest entries according to the global score map ranking.
|
112 |
score_map_cat = torch.cat([param.flatten() for param in self.score_map.values()])
|
|
|
117 |
for p_name, score in self.score_map.items():
|
118 |
param_masks[p_name] = (score > threshold).to(dtype=score.dtype)
|
119 |
|
120 |
+
# Compute hypothetical size ratio if param_masks would be used as masks for the target parameters.
|
121 |
+
size_ratio = self.get_size_ratio(full=full, target_params=param_masks)
|
122 |
+
return size_ratio, param_masks
|
123 |
|
124 |
+
def _get_param_masks(self, size_ratio: float, full: bool = False) -> dict[str, torch.Tensor]:
|
125 |
"""
|
126 |
+
Helper function that determines which parameters to keep to reach a target size ratio.
|
127 |
+
Instead of looping over `k -> _predict_size_ratio_by_score(k)`, a binary search can be used because
|
128 |
+
the size ratio is monotonically increasing in k.
|
129 |
|
130 |
Args:
|
131 |
+
size_ratio: Target size ratio.
|
132 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
133 |
See also `ParametrizedModel.get_num_params`.
|
134 |
|
135 |
+
Returns: Parameter masks indicating which parameters to keep to reach the target size ratio.
|
136 |
"""
|
137 |
+
if size_ratio == 1.0:
|
138 |
return {p_name: torch.ones_like(score) for p_name, score in self.score_map.items()}
|
139 |
|
140 |
+
# Perform a binary search to find the smallest k such that the size ratio is at least size_ratio.
|
141 |
# Here, k_lo and k_hi are the lower and upper bound of the search interval.
|
142 |
k_lo, k_hi = 1, sum(score.numel() for score in self.score_map.values())
|
143 |
while k_lo < k_hi:
|
144 |
k_mid = (k_lo + k_hi + 1) // 2 # round up to ensure low <= mid
|
145 |
+
ratio, _ = self._predict_size_ratio_by_score(k=k_mid, full=full)
|
146 |
+
if ratio > size_ratio:
|
147 |
k_lo = k_mid
|
148 |
else:
|
149 |
k_hi = k_mid - 1
|
150 |
k = k_lo
|
151 |
# TODO: handle tie-breaks
|
152 |
+
return self._predict_size_ratio_by_score(k=k, full=full)[1]
|
153 |
+
|
154 |
+
def prune_model_by_score(
|
155 |
+
self,
|
156 |
+
size_ratio: float | None = None,
|
157 |
+
compression_rate: float | None = None,
|
158 |
+
full: bool = False,
|
159 |
+
) -> None:
|
160 |
"""
|
161 |
This method prunes the target parameters of the model according to their scores to achieve
|
162 |
+
a given size ratio.
|
163 |
|
164 |
This can be efficiently implemented by a simple binary search strategy:
|
165 |
We find the smallest number of parameters to be pruned according to the score map ranking
|
166 |
+
such that the resulting size ratio is at least the target `size_ratio`.
|
167 |
|
168 |
Args:
|
169 |
+
size_ratio: The target size ratio, which is the ratio between the size of the compressed model and
|
170 |
+
the original model (where size is measured in number of parameters).
|
171 |
+
If not provided, `compression_rate` must be provided.
|
172 |
+
compression_rate: This is a convenience parameter that allows you to set the target compression rate
|
173 |
+
instead of `size_ratio`. It is equivalent to `size_ratio = 1.0 - compression_rate`.
|
174 |
+
If both `size_ratio` and `compression_rate` are provided, `size_ratio` is used.
|
175 |
full: Whether to count the number of parameters of the entire model or only the parametrized modules.
|
176 |
See also `ParametrizedModel.get_num_params`.
|
177 |
"""
|
178 |
+
if size_ratio is None and compression_rate is None:
|
179 |
+
raise ValueError("Either `size_ratio` or `compression_rate` must be provided.")
|
180 |
+
elif size_ratio is None and compression_rate is not None:
|
181 |
+
size_ratio = 1.0 - compression_rate
|
182 |
+
else:
|
183 |
+
logger.warning("Both `size_ratio` and `compression_rate` are provided. Using `size_ratio`.")
|
184 |
+
|
185 |
+
param_masks = self._get_param_masks(size_ratio=size_ratio, full=full)
|
186 |
|
187 |
# Reset the target parameters according to the parameter masks
|
188 |
for p_name, param in self.get_target_params().items():
|
config.json
CHANGED
@@ -9,8 +9,8 @@
|
|
9 |
"bias": "none",
|
10 |
"eva_config": null,
|
11 |
"exclude_modules": [
|
12 |
-
"base",
|
13 |
"parametrization",
|
|
|
14 |
"ortho"
|
15 |
],
|
16 |
"fan_in_fan_out": false,
|
@@ -32,14 +32,14 @@
|
|
32 |
"revision": null,
|
33 |
"target_modules": [
|
34 |
"up_proj",
|
35 |
-
"
|
36 |
-
"gate_proj",
|
37 |
"k_proj",
|
38 |
"v_proj",
|
39 |
-
"
|
40 |
-
"q_proj",
|
41 |
"base",
|
42 |
-
"
|
|
|
|
|
43 |
],
|
44 |
"task_type": "CAUSAL_LM",
|
45 |
"use_dora": false,
|
|
|
9 |
"bias": "none",
|
10 |
"eva_config": null,
|
11 |
"exclude_modules": [
|
|
|
12 |
"parametrization",
|
13 |
+
"base",
|
14 |
"ortho"
|
15 |
],
|
16 |
"fan_in_fan_out": false,
|
|
|
32 |
"revision": null,
|
33 |
"target_modules": [
|
34 |
"up_proj",
|
35 |
+
"q_proj",
|
|
|
36 |
"k_proj",
|
37 |
"v_proj",
|
38 |
+
"ortho",
|
|
|
39 |
"base",
|
40 |
+
"gate_proj",
|
41 |
+
"o_proj",
|
42 |
+
"down_proj"
|
43 |
],
|
44 |
"task_type": "CAUSAL_LM",
|
45 |
"use_dora": false,
|
parametrized_model.py
CHANGED
@@ -353,7 +353,7 @@ class ParametrizedModel(PreTrainedModel):
|
|
353 |
The corresponding modules are accessed via `parametrized_modules`, `adapter_modules`,
|
354 |
and `quantized_modules`, respectively.
|
355 |
The class also provides several convenience methods to manage the parametrization: `get_target_params`,
|
356 |
-
`get_num_params`, `
|
357 |
|
358 |
Standard functionality (`forward`, `generate`, `save_pretrained`, `from_pretrained`) is essentially forwarded
|
359 |
to the wrapped model.
|
@@ -698,9 +698,9 @@ class ParametrizedModel(PreTrainedModel):
|
|
698 |
num_params = 1e-6
|
699 |
return num_params
|
700 |
|
701 |
-
def
|
702 |
"""
|
703 |
-
Convenience function to compute the
|
704 |
|
705 |
See Also:
|
706 |
`get_num_params`
|
|
|
353 |
The corresponding modules are accessed via `parametrized_modules`, `adapter_modules`,
|
354 |
and `quantized_modules`, respectively.
|
355 |
The class also provides several convenience methods to manage the parametrization: `get_target_params`,
|
356 |
+
`get_num_params`, `get_size_ratio`, `reset_target_params`, `compress`.
|
357 |
|
358 |
Standard functionality (`forward`, `generate`, `save_pretrained`, `from_pretrained`) is essentially forwarded
|
359 |
to the wrapped model.
|
|
|
698 |
num_params = 1e-6
|
699 |
return num_params
|
700 |
|
701 |
+
def get_size_ratio(self, full: bool = False, target_params: dict[str, torch.Tensor] | None = None) -> float:
|
702 |
"""
|
703 |
+
Convenience function to compute the size ratio of the present model.
|
704 |
|
705 |
See Also:
|
706 |
`get_num_params`
|