AttributeError: Qwen2TokenizerFast has no attribute video_token

#3
by Merouin - opened

You have video processor config saved in preprocessor.json file which is deprecated. Video processor configs should be saved in their own video_preprocessor.json file. You can rename the file or load and save the processor back which renames it automatically. Loading from preprocessor.json will be removed in v5.0.

AttributeError Traceback (most recent call last)
Cell In[5], line 6
4 torch_device = "cuda"
5 model_checkpoint = "/media/cfs/transaction-ctr-offline/InternVL/InternVL3-78B-hf"
----> 6 processor = AutoProcessor.from_pretrained(model_checkpoint)
7 model = AutoModelForImageTextToText.from_pretrained(model_checkpoint, device_map=torch_device, torch_dtype=torch.bfloat16)

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py:376, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
372 return processor_class.from_pretrained(
373 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
374 )
375 elif processor_class is not None:
--> 376 return processor_class.from_pretrained(
377 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
378 )
379 # Last try: we use the PROCESSOR_MAPPING.
380 elif type(config) in PROCESSOR_MAPPING:

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/processing_utils.py:1187, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1185 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1186 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
-> 1187 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/processing_utils.py:982, in ProcessorMixin.from_args_and_dict(cls, args, processor_dict, **kwargs)
979 del processor_dict["auto_map"]
981 unused_kwargs = cls.validate_init_kwargs(processor_config=processor_dict, valid_kwargs=cls.valid_kwargs)
--> 982 processor = cls(*args, **processor_dict)
984 # Update processor with kwargs if needed
985 for key in set(kwargs.keys()):

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/models/internvl/processing_internvl.py:98, in InternVLProcessor.init(self, image_processor, tokenizer, video_processor, image_seq_length, chat_template, **kwargs)
96 self.end_image_token = tokenizer.end_image_token
97 self.image_token = tokenizer.context_image_token
---> 98 self.video_token = tokenizer.video_token
99 self.image_token_id = tokenizer.context_image_token_id
101 super().init(image_processor, tokenizer, video_processor, chat_template=chat_template, **kwargs)

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1111, in SpecialTokensMixin.getattr(self, key)
1108 return self.convert_tokens_to_ids(attr_as_tokens) if attr_as_tokens is not None else None
1110 if key not in self.dict:
-> 1111 raise AttributeError(f"{self.class.name} has no attribute {key}")
1112 else:
1113 return super().getattr(key)

AttributeError: Qwen2TokenizerFast has no attribute video_token

Sign up or log in to comment