Upload microsoft_phi-4_2025-05-27-09-33-47.json
Browse filesserver executed with ./llama-server \
	-m ./phi-4-Q8_0.gguf       \
	-c 16384                   \
	-np 64                     \
	-ngl 99                    \
	-fa                        \
	-t 8                       \
	--host 0.0.0.0 --port 8000
and benchmarked via sudo docker run --network host -e HF_TOKEN=$HF_TOKEN        -v ~/inference-benchmarker-results:/opt/inference-benchmarker/results    inference_benchmarker inference-benchmarker     --prompt-options "num_tokens=8000,max_tokens=8020,min_tokens=7980,variance=10"   --decode-options "num_tokens=8000,max_tokens=8020,min_tokens=7980,variance=10"  --url $URL --rates 1.0 --rates 10.0 --rates 30.0 --rates 100.0   --max-vus 800 --duration 120s --warmup 30s --benchmark-kind rate        --model-name "phi-4" --tokenizer-name "microsoft/phi-4"
| @@ -0,0 +1,296 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            {
         | 
| 2 | 
            +
            	"config": {
         | 
| 3 | 
            +
            		"max_vus": 800,
         | 
| 4 | 
            +
            		"duration_secs": 120,
         | 
| 5 | 
            +
            		"benchmark_kind": "Rate",
         | 
| 6 | 
            +
            		"warmup_duration_secs": 30,
         | 
| 7 | 
            +
            		"rates": [
         | 
| 8 | 
            +
            			1.0,
         | 
| 9 | 
            +
            			10.0,
         | 
| 10 | 
            +
            			30.0,
         | 
| 11 | 
            +
            			100.0
         | 
| 12 | 
            +
            		],
         | 
| 13 | 
            +
            		"num_rates": 10,
         | 
| 14 | 
            +
            		"prompt_options": {
         | 
| 15 | 
            +
            			"num_tokens": 8000,
         | 
| 16 | 
            +
            			"min_tokens": 7980,
         | 
| 17 | 
            +
            			"max_tokens": 8020,
         | 
| 18 | 
            +
            			"variance": 10
         | 
| 19 | 
            +
            		},
         | 
| 20 | 
            +
            		"decode_options": {
         | 
| 21 | 
            +
            			"num_tokens": 8000,
         | 
| 22 | 
            +
            			"min_tokens": 7980,
         | 
| 23 | 
            +
            			"max_tokens": 8020,
         | 
| 24 | 
            +
            			"variance": 10
         | 
| 25 | 
            +
            		},
         | 
| 26 | 
            +
            		"tokenizer": "microsoft/phi-4",
         | 
| 27 | 
            +
            		"model_name": "phi-4",
         | 
| 28 | 
            +
            		"profile": null,
         | 
| 29 | 
            +
            		"meta": null,
         | 
| 30 | 
            +
            		"run_id": "llama.cpp -np 64 -fa: unsloth/phi-4-GGUF:Q8_0 (8000 tokens)"
         | 
| 31 | 
            +
            	},
         | 
| 32 | 
            +
            	"results": [
         | 
| 33 | 
            +
            		{
         | 
| 34 | 
            +
            			"id": "warmup",
         | 
| 35 | 
            +
            			"executor_type": "ConstantVUs",
         | 
| 36 | 
            +
            			"config": {
         | 
| 37 | 
            +
            				"max_vus": 1,
         | 
| 38 | 
            +
            				"duration_secs": 30,
         | 
| 39 | 
            +
            				"rate": null
         | 
| 40 | 
            +
            			},
         | 
| 41 | 
            +
            			"total_requests": 27,
         | 
| 42 | 
            +
            			"total_tokens": 9718,
         | 
| 43 | 
            +
            			"token_throughput_secs": 81.74082189646838,
         | 
| 44 | 
            +
            			"duration_ms": 118887,
         | 
| 45 | 
            +
            			"time_to_first_token_ms": {
         | 
| 46 | 
            +
            				"p50": 48.61,
         | 
| 47 | 
            +
            				"p60": 48.891,
         | 
| 48 | 
            +
            				"p70": 49.168,
         | 
| 49 | 
            +
            				"p80": 49.498,
         | 
| 50 | 
            +
            				"p90": 50.034,
         | 
| 51 | 
            +
            				"p95": 51.088,
         | 
| 52 | 
            +
            				"p99": 261.014,
         | 
| 53 | 
            +
            				"avg": 59.311
         | 
| 54 | 
            +
            			},
         | 
| 55 | 
            +
            			"inter_token_latency_ms": {
         | 
| 56 | 
            +
            				"p50": 12.017,
         | 
| 57 | 
            +
            				"p60": 12.026,
         | 
| 58 | 
            +
            				"p70": 12.064,
         | 
| 59 | 
            +
            				"p80": 12.082,
         | 
| 60 | 
            +
            				"p90": 12.093,
         | 
| 61 | 
            +
            				"p95": 12.101,
         | 
| 62 | 
            +
            				"p99": 12.253,
         | 
| 63 | 
            +
            				"avg": 12.029
         | 
| 64 | 
            +
            			},
         | 
| 65 | 
            +
            			"failed_requests": 0,
         | 
| 66 | 
            +
            			"successful_requests": 27,
         | 
| 67 | 
            +
            			"request_rate": 0.22710456793626735,
         | 
| 68 | 
            +
            			"total_tokens_sent": 216000,
         | 
| 69 | 
            +
            			"e2e_latency_ms": {
         | 
| 70 | 
            +
            				"p50": 757.477,
         | 
| 71 | 
            +
            				"p60": 758.668,
         | 
| 72 | 
            +
            				"p70": 760.92,
         | 
| 73 | 
            +
            				"p80": 762.799,
         | 
| 74 | 
            +
            				"p90": 1499.47,
         | 
| 75 | 
            +
            				"p95": 1531.676,
         | 
| 76 | 
            +
            				"p99": 71879.405,
         | 
| 77 | 
            +
            				"avg": 4403.016
         | 
| 78 | 
            +
            			}
         | 
| 79 | 
            +
            		},
         | 
| 80 | 
            +
            		{
         | 
| 81 | 
            +
            			"id": "[email protected]/s",
         | 
| 82 | 
            +
            			"executor_type": "ConstantArrivalRate",
         | 
| 83 | 
            +
            			"config": {
         | 
| 84 | 
            +
            				"max_vus": 800,
         | 
| 85 | 
            +
            				"duration_secs": 120,
         | 
| 86 | 
            +
            				"rate": 1.0
         | 
| 87 | 
            +
            			},
         | 
| 88 | 
            +
            			"total_requests": 106,
         | 
| 89 | 
            +
            			"total_tokens": 8313,
         | 
| 90 | 
            +
            			"token_throughput_secs": 70.87113803161708,
         | 
| 91 | 
            +
            			"duration_ms": 117297,
         | 
| 92 | 
            +
            			"time_to_first_token_ms": {
         | 
| 93 | 
            +
            				"p50": 65.923,
         | 
| 94 | 
            +
            				"p60": 68.563,
         | 
| 95 | 
            +
            				"p70": 70.138,
         | 
| 96 | 
            +
            				"p80": 72.142,
         | 
| 97 | 
            +
            				"p90": 75.565,
         | 
| 98 | 
            +
            				"p95": 86.811,
         | 
| 99 | 
            +
            				"p99": 185.545,
         | 
| 100 | 
            +
            				"avg": 71.059
         | 
| 101 | 
            +
            			},
         | 
| 102 | 
            +
            			"inter_token_latency_ms": {
         | 
| 103 | 
            +
            				"p50": 23.858,
         | 
| 104 | 
            +
            				"p60": 24.409,
         | 
| 105 | 
            +
            				"p70": 25.174,
         | 
| 106 | 
            +
            				"p80": 26.634,
         | 
| 107 | 
            +
            				"p90": 30.208,
         | 
| 108 | 
            +
            				"p95": 32.664,
         | 
| 109 | 
            +
            				"p99": 34.869,
         | 
| 110 | 
            +
            				"avg": 23.811
         | 
| 111 | 
            +
            			},
         | 
| 112 | 
            +
            			"failed_requests": 0,
         | 
| 113 | 
            +
            			"successful_requests": 106,
         | 
| 114 | 
            +
            			"request_rate": 0.9036858692832203,
         | 
| 115 | 
            +
            			"total_tokens_sent": 848000,
         | 
| 116 | 
            +
            			"e2e_latency_ms": {
         | 
| 117 | 
            +
            				"p50": 1513.038,
         | 
| 118 | 
            +
            				"p60": 1568.321,
         | 
| 119 | 
            +
            				"p70": 1768.852,
         | 
| 120 | 
            +
            				"p80": 1985.17,
         | 
| 121 | 
            +
            				"p90": 2476.386,
         | 
| 122 | 
            +
            				"p95": 2851.038,
         | 
| 123 | 
            +
            				"p99": 4129.591,
         | 
| 124 | 
            +
            				"avg": 1925.59
         | 
| 125 | 
            +
            			}
         | 
| 126 | 
            +
            		},
         | 
| 127 | 
            +
            		{
         | 
| 128 | 
            +
            			"id": "[email protected]/s",
         | 
| 129 | 
            +
            			"executor_type": "ConstantArrivalRate",
         | 
| 130 | 
            +
            			"config": {
         | 
| 131 | 
            +
            				"max_vus": 800,
         | 
| 132 | 
            +
            				"duration_secs": 120,
         | 
| 133 | 
            +
            				"rate": 10.0
         | 
| 134 | 
            +
            			},
         | 
| 135 | 
            +
            			"total_requests": 446,
         | 
| 136 | 
            +
            			"total_tokens": 30228,
         | 
| 137 | 
            +
            			"token_throughput_secs": 252.13178505255695,
         | 
| 138 | 
            +
            			"duration_ms": 119889,
         | 
| 139 | 
            +
            			"time_to_first_token_ms": {
         | 
| 140 | 
            +
            				"p50": 17218.753,
         | 
| 141 | 
            +
            				"p60": 24217.22,
         | 
| 142 | 
            +
            				"p70": 31635.918,
         | 
| 143 | 
            +
            				"p80": 40705.096,
         | 
| 144 | 
            +
            				"p90": 50007.449,
         | 
| 145 | 
            +
            				"p95": 55325.95,
         | 
| 146 | 
            +
            				"p99": 60904.405,
         | 
| 147 | 
            +
            				"avg": 21707.867
         | 
| 148 | 
            +
            			},
         | 
| 149 | 
            +
            			"inter_token_latency_ms": {
         | 
| 150 | 
            +
            				"p50": 147.701,
         | 
| 151 | 
            +
            				"p60": 149.864,
         | 
| 152 | 
            +
            				"p70": 152.119,
         | 
| 153 | 
            +
            				"p80": 154.926,
         | 
| 154 | 
            +
            				"p90": 160.571,
         | 
| 155 | 
            +
            				"p95": 165.906,
         | 
| 156 | 
            +
            				"p99": 172.958,
         | 
| 157 | 
            +
            				"avg": 147.392
         | 
| 158 | 
            +
            			},
         | 
| 159 | 
            +
            			"failed_requests": 9,
         | 
| 160 | 
            +
            			"successful_requests": 437,
         | 
| 161 | 
            +
            			"request_rate": 3.645017535661221,
         | 
| 162 | 
            +
            			"total_tokens_sent": 3496000,
         | 
| 163 | 
            +
            			"e2e_latency_ms": {
         | 
| 164 | 
            +
            				"p50": 28135.612,
         | 
| 165 | 
            +
            				"p60": 34510.797,
         | 
| 166 | 
            +
            				"p70": 41724.159,
         | 
| 167 | 
            +
            				"p80": 51495.322,
         | 
| 168 | 
            +
            				"p90": 60757.905,
         | 
| 169 | 
            +
            				"p95": 66146.289,
         | 
| 170 | 
            +
            				"p99": 71105.223,
         | 
| 171 | 
            +
            				"avg": 31938.065
         | 
| 172 | 
            +
            			}
         | 
| 173 | 
            +
            		},
         | 
| 174 | 
            +
            		{
         | 
| 175 | 
            +
            			"id": "[email protected]/s",
         | 
| 176 | 
            +
            			"executor_type": "ConstantArrivalRate",
         | 
| 177 | 
            +
            			"config": {
         | 
| 178 | 
            +
            				"max_vus": 800,
         | 
| 179 | 
            +
            				"duration_secs": 120,
         | 
| 180 | 
            +
            				"rate": 30.0
         | 
| 181 | 
            +
            			},
         | 
| 182 | 
            +
            			"total_requests": 448,
         | 
| 183 | 
            +
            			"total_tokens": 30912,
         | 
| 184 | 
            +
            			"token_throughput_secs": 258.2609449225948,
         | 
| 185 | 
            +
            			"duration_ms": 119692,
         | 
| 186 | 
            +
            			"time_to_first_token_ms": {
         | 
| 187 | 
            +
            				"p50": 25290.398,
         | 
| 188 | 
            +
            				"p60": 35623.865,
         | 
| 189 | 
            +
            				"p70": 46048.766,
         | 
| 190 | 
            +
            				"p80": 60693.417,
         | 
| 191 | 
            +
            				"p90": 75994.066,
         | 
| 192 | 
            +
            				"p95": 84006.657,
         | 
| 193 | 
            +
            				"p99": 94917.151,
         | 
| 194 | 
            +
            				"avg": 33043.743
         | 
| 195 | 
            +
            			},
         | 
| 196 | 
            +
            			"inter_token_latency_ms": {
         | 
| 197 | 
            +
            				"p50": 121.857,
         | 
| 198 | 
            +
            				"p60": 125.877,
         | 
| 199 | 
            +
            				"p70": 130.608,
         | 
| 200 | 
            +
            				"p80": 136.086,
         | 
| 201 | 
            +
            				"p90": 142.114,
         | 
| 202 | 
            +
            				"p95": 144.885,
         | 
| 203 | 
            +
            				"p99": 150.811,
         | 
| 204 | 
            +
            				"avg": 122.097
         | 
| 205 | 
            +
            			},
         | 
| 206 | 
            +
            			"failed_requests": 9,
         | 
| 207 | 
            +
            			"successful_requests": 439,
         | 
| 208 | 
            +
            			"request_rate": 3.6677198117565704,
         | 
| 209 | 
            +
            			"total_tokens_sent": 3512000,
         | 
| 210 | 
            +
            			"e2e_latency_ms": {
         | 
| 211 | 
            +
            				"p50": 36013.727,
         | 
| 212 | 
            +
            				"p60": 45412.83,
         | 
| 213 | 
            +
            				"p70": 56300.618,
         | 
| 214 | 
            +
            				"p80": 70423.796,
         | 
| 215 | 
            +
            				"p90": 83725.896,
         | 
| 216 | 
            +
            				"p95": 94968.614,
         | 
| 217 | 
            +
            				"p99": 102032.047,
         | 
| 218 | 
            +
            				"avg": 41844.204
         | 
| 219 | 
            +
            			}
         | 
| 220 | 
            +
            		},
         | 
| 221 | 
            +
            		{
         | 
| 222 | 
            +
            			"id": "[email protected]/s",
         | 
| 223 | 
            +
            			"executor_type": "ConstantArrivalRate",
         | 
| 224 | 
            +
            			"config": {
         | 
| 225 | 
            +
            				"max_vus": 800,
         | 
| 226 | 
            +
            				"duration_secs": 120,
         | 
| 227 | 
            +
            				"rate": 100.0
         | 
| 228 | 
            +
            			},
         | 
| 229 | 
            +
            			"total_requests": 466,
         | 
| 230 | 
            +
            			"total_tokens": 31108,
         | 
| 231 | 
            +
            			"token_throughput_secs": 259.8608345964365,
         | 
| 232 | 
            +
            			"duration_ms": 119710,
         | 
| 233 | 
            +
            			"time_to_first_token_ms": {
         | 
| 234 | 
            +
            				"p50": 34434.388,
         | 
| 235 | 
            +
            				"p60": 44191.564,
         | 
| 236 | 
            +
            				"p70": 56627.842,
         | 
| 237 | 
            +
            				"p80": 71990.192,
         | 
| 238 | 
            +
            				"p90": 87110.437,
         | 
| 239 | 
            +
            				"p95": 97402.223,
         | 
| 240 | 
            +
            				"p99": 105147.119,
         | 
| 241 | 
            +
            				"avg": 40335.484
         | 
| 242 | 
            +
            			},
         | 
| 243 | 
            +
            			"inter_token_latency_ms": {
         | 
| 244 | 
            +
            				"p50": 132.627,
         | 
| 245 | 
            +
            				"p60": 136.975,
         | 
| 246 | 
            +
            				"p70": 140.653,
         | 
| 247 | 
            +
            				"p80": 144.214,
         | 
| 248 | 
            +
            				"p90": 147.981,
         | 
| 249 | 
            +
            				"p95": 150.702,
         | 
| 250 | 
            +
            				"p99": 160.429,
         | 
| 251 | 
            +
            				"avg": 129.4
         | 
| 252 | 
            +
            			},
         | 
| 253 | 
            +
            			"failed_requests": 26,
         | 
| 254 | 
            +
            			"successful_requests": 440,
         | 
| 255 | 
            +
            			"request_rate": 3.6755422149425248,
         | 
| 256 | 
            +
            			"total_tokens_sent": 3520000,
         | 
| 257 | 
            +
            			"e2e_latency_ms": {
         | 
| 258 | 
            +
            				"p50": 43840.122,
         | 
| 259 | 
            +
            				"p60": 55318.742,
         | 
| 260 | 
            +
            				"p70": 69480.146,
         | 
| 261 | 
            +
            				"p80": 81453.487,
         | 
| 262 | 
            +
            				"p90": 97388.642,
         | 
| 263 | 
            +
            				"p95": 106561.334,
         | 
| 264 | 
            +
            				"p99": 113609.244,
         | 
| 265 | 
            +
            				"avg": 49651.897
         | 
| 266 | 
            +
            			}
         | 
| 267 | 
            +
            		}
         | 
| 268 | 
            +
            	],
         | 
| 269 | 
            +
            	"start_time": "2025-05-27T09:16:48.866142727+00:00",
         | 
| 270 | 
            +
            	"end_time": "2025-05-27T09:33:47.431269495+00:00",
         | 
| 271 | 
            +
            	"system": {
         | 
| 272 | 
            +
            		"cpu": [
         | 
| 273 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
         | 
| 274 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
         | 
| 275 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
         | 
| 276 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
         | 
| 277 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
         | 
| 278 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
         | 
| 279 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
         | 
| 280 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
         | 
| 281 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
         | 
| 282 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
         | 
| 283 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
         | 
| 284 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
         | 
| 285 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
         | 
| 286 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
         | 
| 287 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
         | 
| 288 | 
            +
            			"AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
         | 
| 289 | 
            +
            		],
         | 
| 290 | 
            +
            		"memory": "83.47 GB",
         | 
| 291 | 
            +
            		"os_name": "Debian GNU/Linux",
         | 
| 292 | 
            +
            		"os_version": "11",
         | 
| 293 | 
            +
            		"kernel": "5.15.167.4-microsoft-standard-WSL2",
         | 
| 294 | 
            +
            		"hostname": "computer"
         | 
| 295 | 
            +
            	}
         | 
| 296 | 
            +
            }
         | 
