OpenGVLab/InternVL3-78B-hf · AttributeError: Qwen2TokenizerFast has no attribute video

You have video processor config saved in `preprocessor.json` file which is deprecated. Video processor configs should be saved in their own `video_preprocessor.json` file. You can rename the file or load and save the processor back which renames it automatically. Loading from `preprocessor.json` will be removed in v5.0.

AttributeError Traceback (most recent call last)
Cell In[5], line 6
4 torch_device = "cuda"
5 model_checkpoint = "/media/cfs/transaction-ctr-offline/InternVL/InternVL3-78B-hf"
----> 6 processor = AutoProcessor.from_pretrained(model_checkpoint)
7 model = AutoModelForImageTextToText.from_pretrained(model_checkpoint, device_map=torch_device, torch_dtype=torch.bfloat16)

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/models/auto/processing_auto.py:376, in AutoProcessor.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
372 return processor_class.from_pretrained(
373 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
374 )
375 elif processor_class is not None:
--> 376 return processor_class.from_pretrained(
377 pretrained_model_name_or_path, trust_remote_code=trust_remote_code, **kwargs
378 )
379 # Last try: we use the PROCESSOR_MAPPING.
380 elif type(config) in PROCESSOR_MAPPING:

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/processing_utils.py:1187, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, **kwargs)
1185 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
1186 processor_dict, kwargs = cls.get_processor_dict(pretrained_model_name_or_path, **kwargs)
-> 1187 return cls.from_args_and_dict(args, processor_dict, **kwargs)

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/processing_utils.py:982, in ProcessorMixin.from_args_and_dict(cls, args, processor_dict, **kwargs)
979 del processor_dict["auto_map"]
981 unused_kwargs = cls.validate_init_kwargs(processor_config=processor_dict, valid_kwargs=cls.valid_kwargs)
--> 982 processor = cls(*args, **processor_dict)
984 # Update processor with kwargs if needed
985 for key in set(kwargs.keys()):

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/models/internvl/processing_internvl.py:98, in InternVLProcessor.init(self, image_processor, tokenizer, video_processor, image_seq_length, chat_template, **kwargs)
96 self.end_image_token = tokenizer.end_image_token
97 self.image_token = tokenizer.context_image_token
---> 98 self.video_token = tokenizer.video_token
99 self.image_token_id = tokenizer.context_image_token_id
101 super().init(image_processor, tokenizer, video_processor, chat_template=chat_template, **kwargs)

File /media/cfs/mayulin.liam/.pylib/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1111, in SpecialTokensMixin.getattr(self, key)
1108 return self.convert_tokens_to_ids(attr_as_tokens) if attr_as_tokens is not None else None
1110 if key not in self.dict:
-> 1111 raise AttributeError(f"{self.class.name} has no attribute {key}")
1112 else:
1113 return super().getattr(key)

AttributeError: Qwen2TokenizerFast has no attribute video_token