robertgshaw2
/

tinyllama-test

Text Generation

text-generation-inference

Model card Files Files and versions Community

tinyllama-test / recipe.yaml

robertgshaw2's picture

Upload folder using huggingface_hub

7bde7d2 verified about 1 year ago

history blame contribute delete

1.1 kB

	test_stage:
	obcq_modifiers:
	LogarithmicEqualizationModifier:
	mappings:
	- - ['re:.q_proj', 're:.k_proj', 're:.*v_proj']
	- re:.*input_layernorm
	- - ['re:.gate_proj', 're:.up_proj']
	- re:.*post_attention_layernorm
	QuantizationModifier:
	ignore: [LlamaRotaryEmbedding, LlamaRMSNorm, SiLUActivation, MatMulOutput_QK, MatMulOutput_PV,
	model.layers.21.mlp.down_proj, model.layers.7.mlp.down_proj, model.layers.2.mlp.down_proj,
	model.layers.8.self_attn.q_proj, model.layers.8.self_attn.k_proj]
	post_oneshot_calibration: true
	scheme_overrides:
	Linear:
	weights: {num_bits: 8, symmetric: true, strategy: channel}
	MatMulLeftInput_QK:
	input_activations: {num_bits: 8, symmetric: true}
	Embedding:
	input_activations: null
	weights: {num_bits: 8, symmetric: false}
	SparseGPTModifier:
	sparsity: 0.5
	block_size: 128
	sequential_update: false
	quantize: true
	percdamp: 0.01
	mask_structure: 0:0
	targets: ['re:model.layers.\d*$']