Bitsandbytes documentation

AdEMAMix

You are viewing v0.45.2 version. A newer version v0.45.4 is available.
Hugging Face's logo
Join the Hugging Face community

and get access to the augmented documentation experience

to get started

AdEMAMix

AdEMAMix is a variant of the Adam optimizer.

bitsandbytes also supports paged optimizers which take advantage of CUDAs unified memory to transfer memory from the GPU to the CPU when GPU memory is exhausted.

AdEMAMix

class bitsandbytes.optim.AdEMAMix

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096is_paged: bool = False )

AdEMAMix8bit

class bitsandbytes.optim.AdEMAMix8bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )

AdEMAMix32bit

class bitsandbytes.optim.AdEMAMix32bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )

PagedAdEMAMix

class bitsandbytes.optim.PagedAdEMAMix

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096 )

PagedAdEMAMix8bit

class bitsandbytes.optim.PagedAdEMAMix8bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )

PagedAdEMAMix32bit

class bitsandbytes.optim.PagedAdEMAMix32bit

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )

__init__

< >

( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )

< > Update on GitHub