Bitsandbytes documentation
AdEMAMix
AdEMAMix
AdEMAMix is a variant of the Adam
optimizer.
bitsandbytes also supports paged optimizers which take advantage of CUDAs unified memory to transfer memory from the GPU to the CPU when GPU memory is exhausted.
AdEMAMix
class bitsandbytes.optim.AdEMAMix
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096is_paged: bool = False )
__init__
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096is_paged: bool = False )
AdEMAMix8bit
class bitsandbytes.optim.AdEMAMix8bit
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )
__init__
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )
AdEMAMix32bit
class bitsandbytes.optim.AdEMAMix32bit
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )
__init__
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096is_paged: bool = False )
PagedAdEMAMix
class bitsandbytes.optim.PagedAdEMAMix
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096 )
__init__
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01optim_bits: typing.Literal[8, 32] = 32min_8bit_size: int = 4096 )
PagedAdEMAMix8bit
class bitsandbytes.optim.PagedAdEMAMix8bit
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )
__init__
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )
PagedAdEMAMix32bit
class bitsandbytes.optim.PagedAdEMAMix32bit
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )
__init__
< source >( params: typing.Iterable[torch.nn.parameter.Parameter]lr: float = 0.001betas: typing.Tuple[float, float, float] = (0.9, 0.999, 0.9999)alpha: float = 5.0t_alpha: typing.Optional[int] = Nonet_beta3: typing.Optional[int] = Noneeps: float = 1e-08weight_decay: float = 0.01min_8bit_size: int = 4096 )