Spaces:

ML-INTA
/

ML-Image

Sleeping

App Files Files Community

ML-Image / diffusers /models /modeling_flax_utils.py

ML-INTA

Upload 358 files

7f43c1b over 1 year ago

raw

history blame contribute delete

25.8 kB

	# coding=utf-8
	# Copyright 2023 The HuggingFace Inc. team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	import os
	from pickle import UnpicklingError
	from typing import Any, Dict, Union

	import jax
	import jax.numpy as jnp
	import msgpack.exceptions
	from flax.core.frozen_dict import FrozenDict, unfreeze
	from flax.serialization import from_bytes, to_bytes
	from flax.traverse_util import flatten_dict, unflatten_dict
	from huggingface_hub import hf_hub_download
	from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError, RevisionNotFoundError
	from requests import HTTPError

	from .. import __version__, is_torch_available
	from ..utils import (
	CONFIG_NAME,
	DIFFUSERS_CACHE,
	FLAX_WEIGHTS_NAME,
	HUGGINGFACE_CO_RESOLVE_ENDPOINT,
	WEIGHTS_NAME,
	logging,
	)
	from .modeling_flax_pytorch_utils import convert_pytorch_state_dict_to_flax


	logger = logging.get_logger(__name__)


	class FlaxModelMixin:
	r"""
	Base class for all flax models.

	[`FlaxModelMixin`] takes care of storing the configuration of the models and handles methods for loading,
	downloading and saving models.
	"""
	config_name = CONFIG_NAME
	_automatically_saved_args = ["_diffusers_version", "_class_name", "_name_or_path"]
	_flax_internal_args = ["name", "parent", "dtype"]

	@classmethod
	def _from_config(cls, config, **kwargs):
	"""
	All context managers that the model should be initialized under go here.
	"""
	return cls(config, **kwargs)

	def _cast_floating_to(self, params: Union[Dict, FrozenDict], dtype: jnp.dtype, mask: Any = None) -> Any:
	"""
	Helper method to cast floating-point values of given parameter `PyTree` to given `dtype`.
	"""

	# taken from https://github.com/deepmind/jmp/blob/3a8318abc3292be38582794dbf7b094e6583b192/jmp/_src/policy.py#L27
	def conditional_cast(param):
	if isinstance(param, jnp.ndarray) and jnp.issubdtype(param.dtype, jnp.floating):
	param = param.astype(dtype)
	return param

	if mask is None:
	return jax.tree_map(conditional_cast, params)

	flat_params = flatten_dict(params)
	flat_mask, _ = jax.tree_flatten(mask)

	for masked, key in zip(flat_mask, flat_params.keys()):
	if masked:
	param = flat_params[key]
	flat_params[key] = conditional_cast(param)

	return unflatten_dict(flat_params)

	def to_bf16(self, params: Union[Dict, FrozenDict], mask: Any = None):
	r"""
	Cast the floating-point `params` to `jax.numpy.bfloat16`. This returns a new `params` tree and does not cast
	the `params` in place.

	This method can be used on TPU to explicitly convert the model parameters to bfloat16 precision to do full
	half-precision training or to save weights in bfloat16 for inference in order to save memory and improve speed.

	Arguments:
	params (`Union[Dict, FrozenDict]`):
	A `PyTree` of model parameters.
	mask (`Union[Dict, FrozenDict]`):
	A `PyTree` with same structure as the `params` tree. The leaves should be booleans, `True` for params
	you want to cast, and should be `False` for those you want to skip.

	Examples:

	```python
	>>> from diffusers import FlaxUNet2DConditionModel

	>>> # load model
	>>> model, params = FlaxUNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5")
	>>> # By default, the model parameters will be in fp32 precision, to cast these to bfloat16 precision
	>>> params = model.to_bf16(params)
	>>> # If you don't want to cast certain parameters (for example layer norm bias and scale)
	>>> # then pass the mask as follows
	>>> from flax import traverse_util

	>>> model, params = FlaxUNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5")
	>>> flat_params = traverse_util.flatten_dict(params)
	>>> mask = {
	... path: (path[-2] != ("LayerNorm", "bias") and path[-2:] != ("LayerNorm", "scale"))
	... for path in flat_params
	... }
	>>> mask = traverse_util.unflatten_dict(mask)
	>>> params = model.to_bf16(params, mask)
	```"""
	return self._cast_floating_to(params, jnp.bfloat16, mask)

	def to_fp32(self, params: Union[Dict, FrozenDict], mask: Any = None):
	r"""
	Cast the floating-point `params` to `jax.numpy.float32`. This method can be used to explicitly convert the
	model parameters to fp32 precision. This returns a new `params` tree and does not cast the `params` in place.

	Arguments:
	params (`Union[Dict, FrozenDict]`):
	A `PyTree` of model parameters.
	mask (`Union[Dict, FrozenDict]`):
	A `PyTree` with same structure as the `params` tree. The leaves should be booleans, `True` for params
	you want to cast, and should be `False` for those you want to skip

	Examples:

	```python
	>>> from diffusers import FlaxUNet2DConditionModel

	>>> # Download model and configuration from huggingface.co
	>>> model, params = FlaxUNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5")
	>>> # By default, the model params will be in fp32, to illustrate the use of this method,
	>>> # we'll first cast to fp16 and back to fp32
	>>> params = model.to_f16(params)
	>>> # now cast back to fp32
	>>> params = model.to_fp32(params)
	```"""
	return self._cast_floating_to(params, jnp.float32, mask)

	def to_fp16(self, params: Union[Dict, FrozenDict], mask: Any = None):
	r"""
	Cast the floating-point `params` to `jax.numpy.float16`. This returns a new `params` tree and does not cast the
	`params` in place.

	This method can be used on GPU to explicitly convert the model parameters to float16 precision to do full
	half-precision training or to save weights in float16 for inference in order to save memory and improve speed.

	Arguments:
	params (`Union[Dict, FrozenDict]`):
	A `PyTree` of model parameters.
	mask (`Union[Dict, FrozenDict]`):
	A `PyTree` with same structure as the `params` tree. The leaves should be booleans, `True` for params
	you want to cast, and should be `False` for those you want to skip

	Examples:

	```python
	>>> from diffusers import FlaxUNet2DConditionModel

	>>> # load model
	>>> model, params = FlaxUNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5")
	>>> # By default, the model params will be in fp32, to cast these to float16
	>>> params = model.to_fp16(params)
	>>> # If you want don't want to cast certain parameters (for example layer norm bias and scale)
	>>> # then pass the mask as follows
	>>> from flax import traverse_util

	>>> model, params = FlaxUNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5")
	>>> flat_params = traverse_util.flatten_dict(params)
	>>> mask = {
	... path: (path[-2] != ("LayerNorm", "bias") and path[-2:] != ("LayerNorm", "scale"))
	... for path in flat_params
	... }
	>>> mask = traverse_util.unflatten_dict(mask)
	>>> params = model.to_fp16(params, mask)
	```"""
	return self._cast_floating_to(params, jnp.float16, mask)

	def init_weights(self, rng: jax.random.KeyArray) -> Dict:
	raise NotImplementedError(f"init_weights method has to be implemented for {self}")

	@classmethod
	def from_pretrained(
	cls,
	pretrained_model_name_or_path: Union[str, os.PathLike],
	dtype: jnp.dtype = jnp.float32,
	*model_args,
	**kwargs,
	):
	r"""
	Instantiate a pretrained flax model from a pre-trained model configuration.

	The warning Weights from XXX not initialized from pretrained model means that the weights of XXX do not come
	pretrained with the rest of the model. It is up to you to train those weights with a downstream fine-tuning
	task.

	The warning Weights from XXX not used in YYY means that the layer XXX is not used by YYY, therefore those
	weights are discarded.

	Parameters:
	pretrained_model_name_or_path (`str` or `os.PathLike`):
	Can be either:

	- A string, the model id of a pretrained model hosted inside a model repo on huggingface.co.
	Valid model ids are namespaced under a user or organization name, like
	`runwayml/stable-diffusion-v1-5`.
	- A path to a directory containing model weights saved using [`~ModelMixin.save_pretrained`],
	e.g., `./my_model_directory/`.
	dtype (`jax.numpy.dtype`, optional, defaults to `jax.numpy.float32`):
	The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
	`jax.numpy.bfloat16` (on TPUs).

	This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
	specified all the computation will be performed with the given `dtype`.

	**Note that this only specifies the dtype of the computation and does not influence the dtype of model
	parameters.**

	If you wish to change the dtype of the model parameters, see [`~ModelMixin.to_fp16`] and
	[`~ModelMixin.to_bf16`].
	model_args (sequence of positional arguments, optional):
	All remaining positional arguments will be passed to the underlying model's `__init__` method.
	cache_dir (`Union[str, os.PathLike]`, optional):
	Path to a directory in which a downloaded pretrained model configuration should be cached if the
	standard cache should not be used.
	force_download (`bool`, optional, defaults to `False`):
	Whether or not to force the (re-)download of the model weights and configuration files, overriding the
	cached versions if they exist.
	resume_download (`bool`, optional, defaults to `False`):
	Whether or not to delete incompletely received files. Will attempt to resume the download if such a
	file exists.
	proxies (`Dict[str, str]`, optional):
	A dictionary of proxy servers to use by protocol or endpoint, e.g., `{'http': 'foo.bar:3128',
	'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
	local_files_only(`bool`, optional, defaults to `False`):
	Whether or not to only look at local files (i.e., do not try to download the model).
	revision (`str`, optional, defaults to `"main"`):
	The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a
	git-based system for storing models and other artifacts on huggingface.co, so `revision` can be any
	identifier allowed by git.
	from_pt (`bool`, optional, defaults to `False`):
	Load the model weights from a PyTorch checkpoint save file.
	kwargs (remaining dictionary of keyword arguments, optional):
	Can be used to update the configuration object (after it being loaded) and initiate the model (e.g.,
	`output_attentions=True`). Behaves differently depending on whether a `config` is provided or
	automatically loaded:

	- If a configuration is provided with `config`, `**kwargs` will be directly passed to the
	underlying model's `__init__` method (we assume all relevant updates to the configuration have
	already been done)
	- If a configuration is not provided, `kwargs` will be first passed to the configuration class
	initialization function ([`~ConfigMixin.from_config`]). Each key of `kwargs` that corresponds to
	a configuration attribute will be used to override said attribute with the supplied `kwargs`
	value. Remaining keys that do not correspond to any configuration attribute will be passed to the
	underlying model's `__init__` function.

	Examples:

	```python
	>>> from diffusers import FlaxUNet2DConditionModel

	>>> # Download model and configuration from huggingface.co and cache.
	>>> model, params = FlaxUNet2DConditionModel.from_pretrained("runwayml/stable-diffusion-v1-5")
	>>> # Model was saved using save_pretrained('./test/saved_model/') (for example purposes, not runnable).
	>>> model, params = FlaxUNet2DConditionModel.from_pretrained("./test/saved_model/")
	```"""
	config = kwargs.pop("config", None)
	cache_dir = kwargs.pop("cache_dir", DIFFUSERS_CACHE)
	force_download = kwargs.pop("force_download", False)
	from_pt = kwargs.pop("from_pt", False)
	resume_download = kwargs.pop("resume_download", False)
	proxies = kwargs.pop("proxies", None)
	local_files_only = kwargs.pop("local_files_only", False)
	use_auth_token = kwargs.pop("use_auth_token", None)
	revision = kwargs.pop("revision", None)
	subfolder = kwargs.pop("subfolder", None)

	user_agent = {
	"diffusers": __version__,
	"file_type": "model",
	"framework": "flax",
	}

	# Load config if we don't provide a configuration
	config_path = config if config is not None else pretrained_model_name_or_path
	model, model_kwargs = cls.from_config(
	config_path,
	cache_dir=cache_dir,
	return_unused_kwargs=True,
	force_download=force_download,
	resume_download=resume_download,
	proxies=proxies,
	local_files_only=local_files_only,
	use_auth_token=use_auth_token,
	revision=revision,
	subfolder=subfolder,
	# model args
	dtype=dtype,
	**kwargs,
	)

	# Load model
	pretrained_path_with_subfolder = (
	pretrained_model_name_or_path
	if subfolder is None
	else os.path.join(pretrained_model_name_or_path, subfolder)
	)
	if os.path.isdir(pretrained_path_with_subfolder):
	if from_pt:
	if not os.path.isfile(os.path.join(pretrained_path_with_subfolder, WEIGHTS_NAME)):
	raise EnvironmentError(
	f"Error no file named {WEIGHTS_NAME} found in directory {pretrained_path_with_subfolder} "
	)
	model_file = os.path.join(pretrained_path_with_subfolder, WEIGHTS_NAME)
	elif os.path.isfile(os.path.join(pretrained_path_with_subfolder, FLAX_WEIGHTS_NAME)):
	# Load from a Flax checkpoint
	model_file = os.path.join(pretrained_path_with_subfolder, FLAX_WEIGHTS_NAME)
	# Check if pytorch weights exist instead
	elif os.path.isfile(os.path.join(pretrained_path_with_subfolder, WEIGHTS_NAME)):
	raise EnvironmentError(
	f"{WEIGHTS_NAME} file found in directory {pretrained_path_with_subfolder}. Please load the model"
	" using `from_pt=True`."
	)
	else:
	raise EnvironmentError(
	f"Error no file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME} found in directory "
	f"{pretrained_path_with_subfolder}."
	)
	else:
	try:
	model_file = hf_hub_download(
	pretrained_model_name_or_path,
	filename=FLAX_WEIGHTS_NAME if not from_pt else WEIGHTS_NAME,
	cache_dir=cache_dir,
	force_download=force_download,
	proxies=proxies,
	resume_download=resume_download,
	local_files_only=local_files_only,
	use_auth_token=use_auth_token,
	user_agent=user_agent,
	subfolder=subfolder,
	revision=revision,
	)

	except RepositoryNotFoundError:
	raise EnvironmentError(
	f"{pretrained_model_name_or_path} is not a local folder and is not a valid model identifier "
	"listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a "
	"token having permission to this repo with `use_auth_token` or log in with `huggingface-cli "
	"login`."
	)
	except RevisionNotFoundError:
	raise EnvironmentError(
	f"{revision} is not a valid git identifier (branch name, tag name or commit id) that exists for "
	"this model name. Check the model page at "
	f"'https://huggingface.co/{pretrained_model_name_or_path}' for available revisions."
	)
	except EntryNotFoundError:
	raise EnvironmentError(
	f"{pretrained_model_name_or_path} does not appear to have a file named {FLAX_WEIGHTS_NAME}."
	)
	except HTTPError as err:
	raise EnvironmentError(
	f"There was a specific connection error when trying to load {pretrained_model_name_or_path}:\n"
	f"{err}"
	)
	except ValueError:
	raise EnvironmentError(
	f"We couldn't connect to '{HUGGINGFACE_CO_RESOLVE_ENDPOINT}' to load this model, couldn't find it"
	f" in the cached files and it looks like {pretrained_model_name_or_path} is not the path to a"
	f" directory containing a file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME}.\nCheckout your"
	" internet connection or see how to run the library in offline mode at"
	" 'https://huggingface.co/docs/transformers/installation#offline-mode'."
	)
	except EnvironmentError:
	raise EnvironmentError(
	f"Can't load the model for '{pretrained_model_name_or_path}'. If you were trying to load it from "
	"'https://huggingface.co/models', make sure you don't have a local directory with the same name. "
	f"Otherwise, make sure '{pretrained_model_name_or_path}' is the correct path to a directory "
	f"containing a file named {FLAX_WEIGHTS_NAME} or {WEIGHTS_NAME}."
	)

	if from_pt:
	if is_torch_available():
	from .modeling_utils import load_state_dict
	else:
	raise EnvironmentError(
	"Can't load the model in PyTorch format because PyTorch is not installed. "
	"Please, install PyTorch or use native Flax weights."
	)

	# Step 1: Get the pytorch file
	pytorch_model_file = load_state_dict(model_file)

	# Step 2: Convert the weights
	state = convert_pytorch_state_dict_to_flax(pytorch_model_file, model)
	else:
	try:
	with open(model_file, "rb") as state_f:
	state = from_bytes(cls, state_f.read())
	except (UnpicklingError, msgpack.exceptions.ExtraData) as e:
	try:
	with open(model_file) as f:
	if f.read().startswith("version"):
	raise OSError(
	"You seem to have cloned a repository without having git-lfs installed. Please"
	" install git-lfs and run `git lfs install` followed by `git lfs pull` in the"
	" folder you cloned."
	)
	else:
	raise ValueError from e
	except (UnicodeDecodeError, ValueError):
	raise EnvironmentError(f"Unable to convert {model_file} to Flax deserializable object. ")
	# make sure all arrays are stored as jnp.ndarray
	# NOTE: This is to prevent a bug this will be fixed in Flax >= v0.3.4:
	# https://github.com/google/flax/issues/1261
	state = jax.tree_util.tree_map(lambda x: jax.device_put(x, jax.devices("cpu")[0]), state)

	# flatten dicts
	state = flatten_dict(state)

	params_shape_tree = jax.eval_shape(model.init_weights, rng=jax.random.PRNGKey(0))
	required_params = set(flatten_dict(unfreeze(params_shape_tree)).keys())

	shape_state = flatten_dict(unfreeze(params_shape_tree))

	missing_keys = required_params - set(state.keys())
	unexpected_keys = set(state.keys()) - required_params

	if missing_keys:
	logger.warning(
	f"The checkpoint {pretrained_model_name_or_path} is missing required keys: {missing_keys}. "
	"Make sure to call model.init_weights to initialize the missing weights."
	)
	cls._missing_keys = missing_keys

	for key in state.keys():
	if key in shape_state and state[key].shape != shape_state[key].shape:
	raise ValueError(
	f"Trying to load the pretrained weight for {key} failed: checkpoint has shape "
	f"{state[key].shape} which is incompatible with the model shape {shape_state[key].shape}. "
	)

	# remove unexpected keys to not be saved again
	for unexpected_key in unexpected_keys:
	del state[unexpected_key]

	if len(unexpected_keys) > 0:
	logger.warning(
	f"Some weights of the model checkpoint at {pretrained_model_name_or_path} were not used when"
	f" initializing {model.__class__.__name__}: {unexpected_keys}\n- This IS expected if you are"
	f" initializing {model.__class__.__name__} from the checkpoint of a model trained on another task or"
	" with another architecture."
	)
	else:
	logger.info(f"All model checkpoint weights were used when initializing {model.__class__.__name__}.\n")

	if len(missing_keys) > 0:
	logger.warning(
	f"Some weights of {model.__class__.__name__} were not initialized from the model checkpoint at"
	f" {pretrained_model_name_or_path} and are newly initialized: {missing_keys}\nYou should probably"
	" TRAIN this model on a down-stream task to be able to use it for predictions and inference."
	)
	else:
	logger.info(
	f"All the weights of {model.__class__.__name__} were initialized from the model checkpoint at"
	f" {pretrained_model_name_or_path}.\nIf your task is similar to the task the model of the checkpoint"
	f" was trained on, you can already use {model.__class__.__name__} for predictions without further"
	" training."
	)

	return model, unflatten_dict(state)

	def save_pretrained(
	self,
	save_directory: Union[str, os.PathLike],
	params: Union[Dict, FrozenDict],
	is_main_process: bool = True,
	):
	"""
	Save a model and its configuration file to a directory, so that it can be re-loaded using the
	`[`~FlaxModelMixin.from_pretrained`]` class method

	Arguments:
	save_directory (`str` or `os.PathLike`):
	Directory to which to save. Will be created if it doesn't exist.
	params (`Union[Dict, FrozenDict]`):
	A `PyTree` of model parameters.
	is_main_process (`bool`, optional, defaults to `True`):
	Whether the process calling this is the main process or not. Useful when in distributed training like
	TPUs and need to call this function on all processes. In this case, set `is_main_process=True` only on
	the main process to avoid race conditions.
	"""
	if os.path.isfile(save_directory):
	logger.error(f"Provided path ({save_directory}) should be a directory, not a file")
	return

	os.makedirs(save_directory, exist_ok=True)

	model_to_save = self

	# Attach architecture to the config
	# Save the config
	if is_main_process:
	model_to_save.save_config(save_directory)

	# save model
	output_model_file = os.path.join(save_directory, FLAX_WEIGHTS_NAME)
	with open(output_model_file, "wb") as f:
	model_bytes = to_bytes(params)
	f.write(model_bytes)

	logger.info(f"Model weights saved in {output_model_file}")