martingenzel commited on
Commit
9de62de
·
verified ·
1 Parent(s): 1c74979
Files changed (3) hide show
  1. acip_model.py +44 -24
  2. config.json +6 -6
  3. parametrized_model.py +3 -3
acip_model.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from typing import Any
2
 
3
  import torch
@@ -5,6 +6,8 @@ from transformers import PreTrainedModel
5
 
6
  from .parametrized_model import ParametrizedModel, ParametrizedModelConfig
7
 
 
 
8
 
9
  class ACIPModelConfig(ParametrizedModelConfig):
10
  """
@@ -24,7 +27,7 @@ class ACIPModel(ParametrizedModel):
24
  It manages a `score_map` that stores the scores of the parametrized modules' target parameters,
25
  which are updated during tuning by the ACIP method.
26
  Moreover, it provides `prune_model_by_score` that prunes the target parameters of the model according to
27
- their scores to achieve any given compression ratio.
28
 
29
  Notes: The `score_map` is managed in float32 internally because a lower precision may lead to unexpected numerical
30
  inaccuracies in the resulting parameter ranking. Fortunately, the memory consumption is negligible compared to
@@ -92,10 +95,10 @@ class ACIPModel(ParametrizedModel):
92
  buffer.copy_(score.detach().float())
93
  self._score_map[p_name] = buffer
94
 
95
- def _predict_compression_ratio_by_score(self, k: int, full: bool = False) -> tuple[float, dict[str, torch.Tensor]]:
96
  """
97
  Helper function that checks what would happen if the k smallest target parameters are pruned
98
- according to the global score map ranking. It returns the resulting compression ratio
99
  and the corresponding parameter masks.
100
 
101
  Args:
@@ -103,7 +106,7 @@ class ACIPModel(ParametrizedModel):
103
  full: Whether to count the number of parameters of the entire model or only the parametrized modules.
104
  See also `ParametrizedModel.get_num_params`.
105
 
106
- Returns: Tuple of compression ratio and parameter masks. The masks indicate which parameters to keep.
107
  """
108
  # Find the threshold value for the k smallest entries according to the global score map ranking.
109
  score_map_cat = torch.cat([param.flatten() for param in self.score_map.values()])
@@ -114,55 +117,72 @@ class ACIPModel(ParametrizedModel):
114
  for p_name, score in self.score_map.items():
115
  param_masks[p_name] = (score > threshold).to(dtype=score.dtype)
116
 
117
- # Compute hypothetical compression ratio if param_masks would be used as masks for the target parameters.
118
- compression_ratio = self.get_compression_ratio(full=full, target_params=param_masks)
119
- return compression_ratio, param_masks
120
 
121
- def _get_param_masks(self, compression_ratio: float, full: bool = False) -> dict[str, torch.Tensor]:
122
  """
123
- Helper function that determines which parameters to keep to reach a target compression ratio.
124
- Instead of looping over `k -> _predict_compression_ratio_by_score(k)`, a binary search can be used because
125
- the compression ratio is monotonically increasing in k.
126
 
127
  Args:
128
- compression_ratio: Target compression ratio.
129
  full: Whether to count the number of parameters of the entire model or only the parametrized modules.
130
  See also `ParametrizedModel.get_num_params`.
131
 
132
- Returns: Parameter masks indicating which parameters to keep to reach the target compression ratio.
133
  """
134
- if compression_ratio == 1.0:
135
  return {p_name: torch.ones_like(score) for p_name, score in self.score_map.items()}
136
 
137
- # Perform a binary search to find the smallest k such that the compression ratio is at least compression_ratio.
138
  # Here, k_lo and k_hi are the lower and upper bound of the search interval.
139
  k_lo, k_hi = 1, sum(score.numel() for score in self.score_map.values())
140
  while k_lo < k_hi:
141
  k_mid = (k_lo + k_hi + 1) // 2 # round up to ensure low <= mid
142
- ratio, _ = self._predict_compression_ratio_by_score(k=k_mid, full=full)
143
- if ratio > compression_ratio:
144
  k_lo = k_mid
145
  else:
146
  k_hi = k_mid - 1
147
  k = k_lo
148
  # TODO: handle tie-breaks
149
- return self._predict_compression_ratio_by_score(k=k, full=full)[1]
150
-
151
- def prune_model_by_score(self, compression_ratio: float, full: bool = False) -> None:
 
 
 
 
 
152
  """
153
  This method prunes the target parameters of the model according to their scores to achieve
154
- a given compression ratio.
155
 
156
  This can be efficiently implemented by a simple binary search strategy:
157
  We find the smallest number of parameters to be pruned according to the score map ranking
158
- such that the resulting compression ratio is at least the target `compression_ratio`.
159
 
160
  Args:
161
- compression_ratio: The target compression ratio.
 
 
 
 
 
162
  full: Whether to count the number of parameters of the entire model or only the parametrized modules.
163
  See also `ParametrizedModel.get_num_params`.
164
  """
165
- param_masks = self._get_param_masks(compression_ratio=compression_ratio, full=full)
 
 
 
 
 
 
 
166
 
167
  # Reset the target parameters according to the parameter masks
168
  for p_name, param in self.get_target_params().items():
 
1
+ import logging
2
  from typing import Any
3
 
4
  import torch
 
6
 
7
  from .parametrized_model import ParametrizedModel, ParametrizedModelConfig
8
 
9
+ logger = logging.getLogger(__name__)
10
+
11
 
12
  class ACIPModelConfig(ParametrizedModelConfig):
13
  """
 
27
  It manages a `score_map` that stores the scores of the parametrized modules' target parameters,
28
  which are updated during tuning by the ACIP method.
29
  Moreover, it provides `prune_model_by_score` that prunes the target parameters of the model according to
30
+ their scores to achieve any given size ratio.
31
 
32
  Notes: The `score_map` is managed in float32 internally because a lower precision may lead to unexpected numerical
33
  inaccuracies in the resulting parameter ranking. Fortunately, the memory consumption is negligible compared to
 
95
  buffer.copy_(score.detach().float())
96
  self._score_map[p_name] = buffer
97
 
98
+ def _predict_size_ratio_by_score(self, k: int, full: bool = False) -> tuple[float, dict[str, torch.Tensor]]:
99
  """
100
  Helper function that checks what would happen if the k smallest target parameters are pruned
101
+ according to the global score map ranking. It returns the resulting size ratio
102
  and the corresponding parameter masks.
103
 
104
  Args:
 
106
  full: Whether to count the number of parameters of the entire model or only the parametrized modules.
107
  See also `ParametrizedModel.get_num_params`.
108
 
109
+ Returns: Tuple of size ratio and parameter masks. The masks indicate which parameters to keep.
110
  """
111
  # Find the threshold value for the k smallest entries according to the global score map ranking.
112
  score_map_cat = torch.cat([param.flatten() for param in self.score_map.values()])
 
117
  for p_name, score in self.score_map.items():
118
  param_masks[p_name] = (score > threshold).to(dtype=score.dtype)
119
 
120
+ # Compute hypothetical size ratio if param_masks would be used as masks for the target parameters.
121
+ size_ratio = self.get_size_ratio(full=full, target_params=param_masks)
122
+ return size_ratio, param_masks
123
 
124
+ def _get_param_masks(self, size_ratio: float, full: bool = False) -> dict[str, torch.Tensor]:
125
  """
126
+ Helper function that determines which parameters to keep to reach a target size ratio.
127
+ Instead of looping over `k -> _predict_size_ratio_by_score(k)`, a binary search can be used because
128
+ the size ratio is monotonically increasing in k.
129
 
130
  Args:
131
+ size_ratio: Target size ratio.
132
  full: Whether to count the number of parameters of the entire model or only the parametrized modules.
133
  See also `ParametrizedModel.get_num_params`.
134
 
135
+ Returns: Parameter masks indicating which parameters to keep to reach the target size ratio.
136
  """
137
+ if size_ratio == 1.0:
138
  return {p_name: torch.ones_like(score) for p_name, score in self.score_map.items()}
139
 
140
+ # Perform a binary search to find the smallest k such that the size ratio is at least size_ratio.
141
  # Here, k_lo and k_hi are the lower and upper bound of the search interval.
142
  k_lo, k_hi = 1, sum(score.numel() for score in self.score_map.values())
143
  while k_lo < k_hi:
144
  k_mid = (k_lo + k_hi + 1) // 2 # round up to ensure low <= mid
145
+ ratio, _ = self._predict_size_ratio_by_score(k=k_mid, full=full)
146
+ if ratio > size_ratio:
147
  k_lo = k_mid
148
  else:
149
  k_hi = k_mid - 1
150
  k = k_lo
151
  # TODO: handle tie-breaks
152
+ return self._predict_size_ratio_by_score(k=k, full=full)[1]
153
+
154
+ def prune_model_by_score(
155
+ self,
156
+ size_ratio: float | None = None,
157
+ compression_rate: float | None = None,
158
+ full: bool = False,
159
+ ) -> None:
160
  """
161
  This method prunes the target parameters of the model according to their scores to achieve
162
+ a given size ratio.
163
 
164
  This can be efficiently implemented by a simple binary search strategy:
165
  We find the smallest number of parameters to be pruned according to the score map ranking
166
+ such that the resulting size ratio is at least the target `size_ratio`.
167
 
168
  Args:
169
+ size_ratio: The target size ratio, which is the ratio between the size of the compressed model and
170
+ the original model (where size is measured in number of parameters).
171
+ If not provided, `compression_rate` must be provided.
172
+ compression_rate: This is a convenience parameter that allows you to set the target compression rate
173
+ instead of `size_ratio`. It is equivalent to `size_ratio = 1.0 - compression_rate`.
174
+ If both `size_ratio` and `compression_rate` are provided, `size_ratio` is used.
175
  full: Whether to count the number of parameters of the entire model or only the parametrized modules.
176
  See also `ParametrizedModel.get_num_params`.
177
  """
178
+ if size_ratio is None and compression_rate is None:
179
+ raise ValueError("Either `size_ratio` or `compression_rate` must be provided.")
180
+ elif size_ratio is None and compression_rate is not None:
181
+ size_ratio = 1.0 - compression_rate
182
+ else:
183
+ logger.warning("Both `size_ratio` and `compression_rate` are provided. Using `size_ratio`.")
184
+
185
+ param_masks = self._get_param_masks(size_ratio=size_ratio, full=full)
186
 
187
  # Reset the target parameters according to the parameter masks
188
  for p_name, param in self.get_target_params().items():
config.json CHANGED
@@ -9,8 +9,8 @@
9
  "bias": "none",
10
  "eva_config": null,
11
  "exclude_modules": [
12
- "base",
13
  "parametrization",
 
14
  "ortho"
15
  ],
16
  "fan_in_fan_out": false,
@@ -32,14 +32,14 @@
32
  "revision": null,
33
  "target_modules": [
34
  "up_proj",
35
- "ortho",
36
- "gate_proj",
37
  "k_proj",
38
  "v_proj",
39
- "down_proj",
40
- "q_proj",
41
  "base",
42
- "o_proj"
 
 
43
  ],
44
  "task_type": "CAUSAL_LM",
45
  "use_dora": false,
 
9
  "bias": "none",
10
  "eva_config": null,
11
  "exclude_modules": [
 
12
  "parametrization",
13
+ "base",
14
  "ortho"
15
  ],
16
  "fan_in_fan_out": false,
 
32
  "revision": null,
33
  "target_modules": [
34
  "up_proj",
35
+ "q_proj",
 
36
  "k_proj",
37
  "v_proj",
38
+ "ortho",
 
39
  "base",
40
+ "gate_proj",
41
+ "o_proj",
42
+ "down_proj"
43
  ],
44
  "task_type": "CAUSAL_LM",
45
  "use_dora": false,
parametrized_model.py CHANGED
@@ -353,7 +353,7 @@ class ParametrizedModel(PreTrainedModel):
353
  The corresponding modules are accessed via `parametrized_modules`, `adapter_modules`,
354
  and `quantized_modules`, respectively.
355
  The class also provides several convenience methods to manage the parametrization: `get_target_params`,
356
- `get_num_params`, `get_compression_ratio`, `reset_target_params`, `compress`.
357
 
358
  Standard functionality (`forward`, `generate`, `save_pretrained`, `from_pretrained`) is essentially forwarded
359
  to the wrapped model.
@@ -698,9 +698,9 @@ class ParametrizedModel(PreTrainedModel):
698
  num_params = 1e-6
699
  return num_params
700
 
701
- def get_compression_ratio(self, full: bool = False, target_params: dict[str, torch.Tensor] | None = None) -> float:
702
  """
703
- Convenience function to compute the compression ratio of the present model.
704
 
705
  See Also:
706
  `get_num_params`
 
353
  The corresponding modules are accessed via `parametrized_modules`, `adapter_modules`,
354
  and `quantized_modules`, respectively.
355
  The class also provides several convenience methods to manage the parametrization: `get_target_params`,
356
+ `get_num_params`, `get_size_ratio`, `reset_target_params`, `compress`.
357
 
358
  Standard functionality (`forward`, `generate`, `save_pretrained`, `from_pretrained`) is essentially forwarded
359
  to the wrapped model.
 
698
  num_params = 1e-6
699
  return num_params
700
 
701
+ def get_size_ratio(self, full: bool = False, target_params: dict[str, torch.Tensor] | None = None) -> float:
702
  """
703
+ Convenience function to compute the size ratio of the present model.
704
 
705
  See Also:
706
  `get_num_params`