AttributeError: 'HybridCache' object has no attribute 'float'
#15
by
naruto-soop
- opened
Hi, I've encountered an error during fine-tuning:
Traceback (most recent call last):
train_stats = trainer.train()
^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/trl/trainer/sft_trainer.py", line 361, in train
output = super().train(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 2250, in train
return inner_training_loop(
^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 2625, in _inner_training_loop
self._maybe_log_save_evaluate(
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 3098, in _maybe_log_save_evaluate
metrics = self._evaluate(trial, ignore_keys_for_eval)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 3052, in _evaluate
metrics = self.evaluate(ignore_keys=ignore_keys_for_eval)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 4118, in evaluate
output = eval_loop(
^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 4312, in evaluation_loop
losses, logits, labels = self.prediction_step(model, inputs, prediction_loss_only, ignore_keys=ignore_keys)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 4528, in prediction_step
loss, outputs = self.compute_loss(model, inputs, return_outputs=True)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/transformers/trainer.py", line 3772, in compute_loss
outputs = model(**inputs)
^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1736, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/torch/nn/modules/module.py", line 1747, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/accelerate/utils/operations.py", line 823, in forward
return model_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/accelerate/utils/operations.py", line 811, in __call__
return convert_to_fp32(self.model_forward(*args, **kwargs))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/accelerate/utils/operations.py", line 790, in convert_to_fp32
return recursively_apply(_convert_to_fp32, tensor, test_type=_is_fp16_bf16_tensor)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/accelerate/utils/operations.py", line 119, in recursively_apply
{
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/accelerate/utils/operations.py", line 120, in <dictcomp>
k: recursively_apply(
^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/accelerate/utils/operations.py", line 127, in recursively_apply
return func(data, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/anaconda3/envs/ME/lib/python3.11/site-packages/accelerate/utils/operations.py", line 782, in _convert_to_fp32
return tensor.float()
AttributeError: 'HybridCache' object has no attribute 'float'
This error didn't appear with the Gemma3-1B model.
I think it's an issue with the version of Transformers. (transformers 4.50.0.dev0)
Gemma3-1B is Gemma3ForCausalLM (text only)
Gemma3-4B and larger are Gemma3ForConditionalGeneration (text and image) which leads to issues like this.