llm-profiler / configs /gpu_configs.json
zenghaolun02
add demo
0c4803b
{
"t4-pcie-15gb": {
"name": "t4-pcie-15gb",
"memory_GPU_in_GB": 15,
"hbm_bandwidth_in_GB_per_sec": 300,
"intra_node_bandwidth_in_GB_per_sec": 32,
"peak_fp16_TFLOPS": 65,
"peak_int8_TFLOPS": 130,
"peak_int4_TFLOPS": 260,
"intra_node_min_message_latency": 8e-06
},
"v100-pcie-32gb": {
"name": "v100-pcie-32gb",
"memory_GPU_in_GB": 32,
"hbm_bandwidth_in_GB_per_sec": 900,
"intra_node_bandwidth_in_GB_per_sec": 32,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp16_TFLOPS": 112,
"peak_int8_TFLOPS": 224,
"peak_int4_TFLOPS": 448,
"intra_node_min_message_latency": 8e-06
},
"v100-sxm-32gb": {
"name": "v100-sxm-32gb",
"memory_GPU_in_GB": 32,
"hbm_bandwidth_in_GB_per_sec": 900,
"intra_node_bandwidth_in_GB_per_sec": 300,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp16_TFLOPS": 112,
"peak_int8_TFLOPS": 224,
"peak_int4_TFLOPS": 448,
"intra_node_min_message_latency": 8e-06
},
"br104p": {
"name": "br104p",
"memory_GPU_in_GB": 32,
"hbm_bandwidth_in_GB_per_sec": 819,
"intra_node_bandwidth_in_GB_per_sec": 192,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 256,
"peak_fp16_TFLOPS": 512,
"peak_int8_TFLOPS": 1024,
"intra_node_min_message_latency": 8e-06
},
"a100-pcie-40gb": {
"name": "a100-pcie-40gb",
"memory_GPU_in_GB": 40,
"hbm_bandwidth_in_GB_per_sec": 1555,
"intra_node_bandwidth_in_GB_per_sec": 64,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 156,
"peak_fp16_TFLOPS": 312,
"peak_int8_TFLOPS": 624,
"peak_int4_TFLOPS": 1248,
"intra_node_min_message_latency": 8e-06
},
"a100-sxm-40gb": {
"name": "a100-sxm-40gb",
"memory_GPU_in_GB": 40,
"hbm_bandwidth_in_GB_per_sec": 1555,
"intra_node_bandwidth_in_GB_per_sec": 600,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 156,
"peak_fp16_TFLOPS": 312,
"peak_int8_TFLOPS": 624,
"peak_int4_TFLOPS": 1248,
"intra_node_min_message_latency": 8e-06
},
"a100-pcie-80gb": {
"name": "a100-pcie-80gb",
"memory_GPU_in_GB": 80,
"hbm_bandwidth_in_GB_per_sec": 1935,
"intra_node_bandwidth_in_GB_per_sec": 64,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 156,
"peak_fp16_TFLOPS": 312,
"peak_int8_TFLOPS": 624,
"peak_int4_TFLOPS": 1248,
"intra_node_min_message_latency": 8e-06
},
"a100-sxm-80gb": {
"name": "a100-sxm-80gb",
"memory_GPU_in_GB": 80,
"hbm_bandwidth_in_GB_per_sec": 2039,
"intra_node_bandwidth_in_GB_per_sec": 600,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 156,
"peak_fp16_TFLOPS": 312,
"peak_int8_TFLOPS": 624,
"peak_int4_TFLOPS": 1248,
"intra_node_min_message_latency": 8e-06
},
"910b-64gb": {
"name": "910b-64gb",
"memory_GPU_in_GB": 64,
"hbm_bandwidth_in_GB_per_sec": 460,
"intra_node_bandwidth_in_GB_per_sec": 392,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 188,
"peak_fp16_TFLOPS": 376,
"peak_int8_TFLOPS": 752,
"peak_int4_TFLOPS": 1504,
"intra_node_min_message_latency": 9e-06
},
"h100-sxm-80gb": {
"name": "a100-sxm-80gb",
"memory_GPU_in_GB": 80,
"hbm_bandwidth_in_GB_per_sec": 3430,
"intra_node_bandwidth_in_GB_per_sec": 900,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 989,
"peak_fp16_TFLOPS": 1979,
"peak_int8_TFLOPS": 3958,
"intra_node_min_message_latency": 8e-06
},
"h100-pcie-80gb": {
"name": "a100-sxm-80gb",
"memory_GPU_in_GB": 80,
"hbm_bandwidth_in_GB_per_sec": 2048,
"intra_node_bandwidth_in_GB_per_sec": 128,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 756,
"peak_fp16_TFLOPS": 1513,
"peak_int8_TFLOPS": 3026,
"intra_node_min_message_latency": 8e-06
},
"a30-pcie-24gb": {
"name": "a30-pcie-24gb",
"memory_GPU_in_GB": 24,
"hbm_bandwidth_in_GB_per_sec": 933,
"intra_node_bandwidth_in_GB_per_sec": 64,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 82,
"peak_fp16_TFLOPS": 165,
"peak_int8_TFLOPS": 330,
"peak_int4_TFLOPS": 661,
"intra_node_min_message_latency": 8e-06
},
"a30-sxm-24gb": {
"name": "a30-sxm-24gb",
"memory_GPU_in_GB": 24,
"hbm_bandwidth_in_GB_per_sec": 933,
"intra_node_bandwidth_in_GB_per_sec": 200,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 82,
"peak_fp16_TFLOPS": 165,
"peak_int8_TFLOPS": 330,
"peak_int4_TFLOPS": 661,
"intra_node_min_message_latency": 8e-06
},
"a40-pcie-48gb": {
"name": "a40-pcie-48gb",
"memory_GPU_in_GB": 44.98,
"hbm_bandwidth_in_GB_per_sec": 696,
"intra_node_bandwidth_in_GB_per_sec": 64,
"inter_node_bandwidth_in_GB_per_sec": 200,
"peak_fp32_TFLOPS": 74.8,
"peak_fp16_TFLOPS": 149.7,
"peak_int8_TFLOPS": 299.3,
"peak_int4_TFLOPS": 598.7,
"intra_node_min_message_latency": 8e-06
}
}