textgeflecht commited on
Commit
f48b23e
·
verified ·
1 Parent(s): 0eec002

Upload microsoft_phi-4_2025-05-28-14-33-07.json

Browse files

ik llama cpp server: ./llama-server -m ./phi-4-Q8_0.gguf -c 16384 -ngl 99 -np 8 -fa --cont-batching --host 0.0.0.0 --port 8000
benchmark: sudo docker run --network host -e HF_TOKEN=$HF_TOKEN -v ~/inference-benchmarker-results:/opt/inference-benchmarker/results inference_benchmarker inference-benchmarker --no-console --prompt-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" --decode-options "num_tokens=200,max_tokens=220,min_tokens=180,variance=10" --url $URL --rates 1.0 --rates 10.0 --rates 30.0 --rates 100.0 --max-vus 8 --duration 120s --warmup 30s --benchmark-kind rate --model-name "phi-4" --tokenizer-name "microsoft/phi-4"

microsoft_phi-4_2025-05-28-14-33-07.json ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "max_vus": 8,
4
+ "duration_secs": 120,
5
+ "benchmark_kind": "Rate",
6
+ "warmup_duration_secs": 30,
7
+ "rates": [
8
+ 1.0,
9
+ 10.0,
10
+ 30.0,
11
+ 100.0
12
+ ],
13
+ "num_rates": 10,
14
+ "prompt_options": {
15
+ "num_tokens": 200,
16
+ "min_tokens": 180,
17
+ "max_tokens": 220,
18
+ "variance": 10
19
+ },
20
+ "decode_options": {
21
+ "num_tokens": 200,
22
+ "min_tokens": 180,
23
+ "max_tokens": 220,
24
+ "variance": 10
25
+ },
26
+ "tokenizer": "microsoft/phi-4",
27
+ "model_name": "phi-4",
28
+ "profile": null,
29
+ "meta": null,
30
+ "run_id": "ik_llama.cpp -np 8 -fa, max-vus 8: unsloth/phi-4-GGUF:Q8_0 (200 tokens)"
31
+ },
32
+ "results": [
33
+ {
34
+ "id": "warmup",
35
+ "executor_type": "ConstantVUs",
36
+ "config": {
37
+ "max_vus": 1,
38
+ "duration_secs": 30,
39
+ "rate": null
40
+ },
41
+ "total_requests": 12,
42
+ "total_tokens": 2308,
43
+ "token_throughput_secs": 82.4664278690674,
44
+ "duration_ms": 27987,
45
+ "time_to_first_token_ms": {
46
+ "p50": 49.879,
47
+ "p60": 53.321,
48
+ "p70": 55.046,
49
+ "p80": 56.736,
50
+ "p90": 59.507,
51
+ "p95": 108.881,
52
+ "p99": 156.897,
53
+ "avg": 59.258
54
+ },
55
+ "inter_token_latency_ms": {
56
+ "p50": 11.875,
57
+ "p60": 12.034,
58
+ "p70": 12.086,
59
+ "p80": 12.097,
60
+ "p90": 12.165,
61
+ "p95": 12.349,
62
+ "p99": 12.521,
63
+ "avg": 11.87
64
+ },
65
+ "failed_requests": 0,
66
+ "successful_requests": 12,
67
+ "request_rate": 0.4287682558183747,
68
+ "total_tokens_sent": 2400,
69
+ "e2e_latency_ms": {
70
+ "p50": 2391.624,
71
+ "p60": 2468.535,
72
+ "p70": 2503.242,
73
+ "p80": 2548.516,
74
+ "p90": 2559.961,
75
+ "p95": 2594.089,
76
+ "p99": 2627.326,
77
+ "avg": 2332.105
78
+ }
79
+ },
80
+ {
81
+ "id": "[email protected]/s",
82
+ "executor_type": "ConstantArrivalRate",
83
+ "config": {
84
+ "max_vus": 8,
85
+ "duration_secs": 120,
86
+ "rate": 1.0
87
+ },
88
+ "total_requests": 116,
89
+ "total_tokens": 21863,
90
+ "token_throughput_secs": 184.67660248254646,
91
+ "duration_ms": 118385,
92
+ "time_to_first_token_ms": {
93
+ "p50": 49.419,
94
+ "p60": 51.239,
95
+ "p70": 52.777,
96
+ "p80": 53.873,
97
+ "p90": 56.373,
98
+ "p95": 60.17,
99
+ "p99": 63.231,
100
+ "avg": 49.82
101
+ },
102
+ "inter_token_latency_ms": {
103
+ "p50": 17.018,
104
+ "p60": 17.173,
105
+ "p70": 17.372,
106
+ "p80": 17.563,
107
+ "p90": 17.768,
108
+ "p95": 17.968,
109
+ "p99": 18.132,
110
+ "avg": 17.029
111
+ },
112
+ "failed_requests": 0,
113
+ "successful_requests": 116,
114
+ "request_rate": 0.9798511589432095,
115
+ "total_tokens_sent": 23200,
116
+ "e2e_latency_ms": {
117
+ "p50": 3393.24,
118
+ "p60": 3458.146,
119
+ "p70": 3525.546,
120
+ "p80": 3601.401,
121
+ "p90": 3672.722,
122
+ "p95": 3775.723,
123
+ "p99": 3899.947,
124
+ "avg": 3247.9
125
+ }
126
+ },
127
+ {
128
+ "id": "[email protected]/s",
129
+ "executor_type": "ConstantArrivalRate",
130
+ "config": {
131
+ "max_vus": 8,
132
+ "duration_secs": 120,
133
+ "rate": 10.0
134
+ },
135
+ "total_requests": 235,
136
+ "total_tokens": 45584,
137
+ "token_throughput_secs": 381.12691659339066,
138
+ "duration_ms": 119603,
139
+ "time_to_first_token_ms": {
140
+ "p50": 51.704,
141
+ "p60": 53.373,
142
+ "p70": 54.916,
143
+ "p80": 56.524,
144
+ "p90": 59.428,
145
+ "p95": 60.845,
146
+ "p99": 63.778,
147
+ "avg": 51.745
148
+ },
149
+ "inter_token_latency_ms": {
150
+ "p50": 19.964,
151
+ "p60": 20.132,
152
+ "p70": 20.354,
153
+ "p80": 20.523,
154
+ "p90": 20.799,
155
+ "p95": 21.086,
156
+ "p99": 21.468,
157
+ "avg": 20.033
158
+ },
159
+ "failed_requests": 0,
160
+ "successful_requests": 235,
161
+ "request_rate": 1.9648303220306862,
162
+ "total_tokens_sent": 47000,
163
+ "e2e_latency_ms": {
164
+ "p50": 4003.139,
165
+ "p60": 4062.659,
166
+ "p70": 4121.389,
167
+ "p80": 4195.217,
168
+ "p90": 4296.563,
169
+ "p95": 4344.704,
170
+ "p99": 4380.68,
171
+ "avg": 3919.803
172
+ }
173
+ },
174
+ {
175
+ "id": "[email protected]/s",
176
+ "executor_type": "ConstantArrivalRate",
177
+ "config": {
178
+ "max_vus": 8,
179
+ "duration_secs": 120,
180
+ "rate": 30.0
181
+ },
182
+ "total_requests": 241,
183
+ "total_tokens": 46298,
184
+ "token_throughput_secs": 387.99671398196426,
185
+ "duration_ms": 119325,
186
+ "time_to_first_token_ms": {
187
+ "p50": 54.681,
188
+ "p60": 56.362,
189
+ "p70": 57.553,
190
+ "p80": 59.078,
191
+ "p90": 62.431,
192
+ "p95": 68.074,
193
+ "p99": 83.383,
194
+ "avg": 55.154
195
+ },
196
+ "inter_token_latency_ms": {
197
+ "p50": 20.023,
198
+ "p60": 20.144,
199
+ "p70": 20.312,
200
+ "p80": 20.486,
201
+ "p90": 20.72,
202
+ "p95": 20.918,
203
+ "p99": 21.329,
204
+ "avg": 20.037
205
+ },
206
+ "failed_requests": 0,
207
+ "successful_requests": 241,
208
+ "request_rate": 2.019681370030096,
209
+ "total_tokens_sent": 48200,
210
+ "e2e_latency_ms": {
211
+ "p50": 4015.72,
212
+ "p60": 4072.497,
213
+ "p70": 4146.357,
214
+ "p80": 4223.948,
215
+ "p90": 4319.526,
216
+ "p95": 4456.223,
217
+ "p99": 4535.434,
218
+ "avg": 3889.656
219
+ }
220
+ },
221
+ {
222
+ "id": "[email protected]/s",
223
+ "executor_type": "ConstantArrivalRate",
224
+ "config": {
225
+ "max_vus": 8,
226
+ "duration_secs": 120,
227
+ "rate": 100.0
228
+ },
229
+ "total_requests": 260,
230
+ "total_tokens": 46146,
231
+ "token_throughput_secs": 386.21664733150055,
232
+ "duration_ms": 119482,
233
+ "time_to_first_token_ms": {
234
+ "p50": 56.289,
235
+ "p60": 57.47,
236
+ "p70": 59.33,
237
+ "p80": 62.238,
238
+ "p90": 71.092,
239
+ "p95": 79.577,
240
+ "p99": 189.253,
241
+ "avg": 61.294
242
+ },
243
+ "inter_token_latency_ms": {
244
+ "p50": 20.116,
245
+ "p60": 20.338,
246
+ "p70": 20.55,
247
+ "p80": 20.781,
248
+ "p90": 21.174,
249
+ "p95": 21.285,
250
+ "p99": 22.105,
251
+ "avg": 20.219
252
+ },
253
+ "failed_requests": 0,
254
+ "successful_requests": 260,
255
+ "request_rate": 2.1760570429980963,
256
+ "total_tokens_sent": 52000,
257
+ "e2e_latency_ms": {
258
+ "p50": 3999.759,
259
+ "p60": 4067.239,
260
+ "p70": 4150.176,
261
+ "p80": 4236.082,
262
+ "p90": 4376.27,
263
+ "p95": 4443.535,
264
+ "p99": 4578.458,
265
+ "avg": 3632.546
266
+ }
267
+ }
268
+ ],
269
+ "start_time": "2025-05-28T14:24:39.396108668+00:00",
270
+ "end_time": "2025-05-28T14:33:07.834005623+00:00",
271
+ "system": {
272
+ "cpu": [
273
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
274
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
275
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
276
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
277
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
278
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
279
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
280
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
281
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
282
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
283
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
284
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
285
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
286
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
287
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
288
+ "AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
289
+ ],
290
+ "memory": "83.47 GB",
291
+ "os_name": "Debian GNU/Linux",
292
+ "os_version": "11",
293
+ "kernel": "5.15.167.4-microsoft-standard-WSL2",
294
+ "hostname": "computer"
295
+ }
296
+ }