|
from typing import Any |
|
|
|
from transformers import AutoImageProcessor, AutoProcessor, AutoTokenizer, ProcessorMixin |
|
|
|
from .configuration_vlm import VLMConfig |
|
|
|
|
|
class VLMProcessor(ProcessorMixin): |
|
attributes: list[str] = ["image_processor", "tokenizer"] |
|
image_processor_class: str = "AutoImageProcessor" |
|
tokenizer_class: str = "AutoTokenizer" |
|
|
|
def __init__( |
|
self, |
|
image_processor: AutoImageProcessor = None, |
|
tokenizer: AutoTokenizer = None, |
|
**kwargs: Any, |
|
): |
|
super().__init__(image_processor, tokenizer, **kwargs) |
|
|
|
@classmethod |
|
def from_names(cls, image_processor_name: str, tokenizer_name: str, **kwargs: Any): |
|
image_processor_args = { |
|
k: v for k, v in kwargs.items() if k in ["trust_remote_code", "use_fast"] |
|
} |
|
tokenizer_args = { |
|
k: v |
|
for k, v in kwargs.items() |
|
if k in ["trust_remote_code", "use_fast", "model_max_length", "padding_side"] |
|
} |
|
|
|
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, **tokenizer_args) |
|
image_processor = AutoImageProcessor.from_pretrained( |
|
image_processor_name, **image_processor_args |
|
) |
|
return cls(image_processor=image_processor, tokenizer=tokenizer) |
|
|
|
|
|
AutoProcessor.register(VLMConfig, VLMProcessor) |