loghugging25 commited on
Commit
f4dc42c
·
1 Parent(s): 008d662

remove long prompt results

Browse files
results/RedHatAI_phi-4-FP8-dynamic_2025-05-21-13-56-47.json DELETED
@@ -1,296 +0,0 @@
1
- {
2
- "config": {
3
- "max_vus": 800,
4
- "duration_secs": 120,
5
- "benchmark_kind": "Rate",
6
- "warmup_duration_secs": 30,
7
- "rates": [
8
- 1.0,
9
- 10.0,
10
- 30.0,
11
- 100.0
12
- ],
13
- "num_rates": 10,
14
- "prompt_options": {
15
- "num_tokens": 8000,
16
- "min_tokens": 7980,
17
- "max_tokens": 8020,
18
- "variance": 10
19
- },
20
- "decode_options": {
21
- "num_tokens": 8000,
22
- "min_tokens": 7980,
23
- "max_tokens": 8020,
24
- "variance": 10
25
- },
26
- "tokenizer": "RedHatAI/phi-4-FP8-dynamic",
27
- "model_name": "phi-4",
28
- "profile": null,
29
- "meta": null,
30
- "run_id": "vLLM: RedHatAI/phi-4-FP8-dynamic (8000 tokens)"
31
- },
32
- "results": [
33
- {
34
- "id": "warmup",
35
- "executor_type": "ConstantVUs",
36
- "config": {
37
- "max_vus": 1,
38
- "duration_secs": 30,
39
- "rate": null
40
- },
41
- "total_requests": 2,
42
- "total_tokens": 1643,
43
- "token_throughput_secs": 38.490013255851395,
44
- "duration_ms": 42686,
45
- "time_to_first_token_ms": {
46
- "p50": 1276.801,
47
- "p60": 1388.913,
48
- "p70": 1501.026,
49
- "p80": 1613.139,
50
- "p90": 1725.252,
51
- "p95": 1781.309,
52
- "p99": 1826.154,
53
- "avg": 1276.801
54
- },
55
- "inter_token_latency_ms": {
56
- "p50": 24.424,
57
- "p60": 24.432,
58
- "p70": 24.44,
59
- "p80": 24.448,
60
- "p90": 24.456,
61
- "p95": 24.46,
62
- "p99": 24.463,
63
- "avg": 24.424
64
- },
65
- "failed_requests": 0,
66
- "successful_requests": 2,
67
- "request_rate": 0.0468533332390157,
68
- "total_tokens_sent": 16000,
69
- "e2e_latency_ms": {
70
- "p50": 21343.075,
71
- "p60": 21391.438,
72
- "p70": 21439.801,
73
- "p80": 21488.164,
74
- "p90": 21536.527,
75
- "p95": 21560.709,
76
- "p99": 21580.054,
77
- "avg": 21343.075
78
- }
79
- },
80
- {
81
- "id": "[email protected]/s",
82
- "executor_type": "ConstantArrivalRate",
83
- "config": {
84
- "max_vus": 800,
85
- "duration_secs": 120,
86
- "rate": 1.0
87
- },
88
- "total_requests": 90,
89
- "total_tokens": 55892,
90
- "token_throughput_secs": 478.696852515677,
91
- "duration_ms": 116758,
92
- "time_to_first_token_ms": {
93
- "p50": 118.856,
94
- "p60": 124.707,
95
- "p70": 131.654,
96
- "p80": 135.562,
97
- "p90": 145.529,
98
- "p95": 150.366,
99
- "p99": 715.649,
100
- "avg": 128.611
101
- },
102
- "inter_token_latency_ms": {
103
- "p50": 45.758,
104
- "p60": 46.229,
105
- "p70": 46.314,
106
- "p80": 46.373,
107
- "p90": 46.483,
108
- "p95": 46.581,
109
- "p99": 46.871,
110
- "avg": 43.271
111
- },
112
- "failed_requests": 0,
113
- "successful_requests": 90,
114
- "request_rate": 0.7708208102485317,
115
- "total_tokens_sent": 720000,
116
- "e2e_latency_ms": {
117
- "p50": 27887.256,
118
- "p60": 30188.411,
119
- "p70": 31661.903,
120
- "p80": 35685.812,
121
- "p90": 45661.636,
122
- "p95": 50093.628,
123
- "p99": 59727.184,
124
- "avg": 27093.895
125
- }
126
- },
127
- {
128
- "id": "[email protected]/s",
129
- "executor_type": "ConstantArrivalRate",
130
- "config": {
131
- "max_vus": 800,
132
- "duration_secs": 120,
133
- "rate": 10.0
134
- },
135
- "total_requests": 97,
136
- "total_tokens": 45779,
137
- "token_throughput_secs": 385.8671945353039,
138
- "duration_ms": 118639,
139
- "time_to_first_token_ms": {
140
- "p50": 264.625,
141
- "p60": 314.639,
142
- "p70": 341.786,
143
- "p80": 416.021,
144
- "p90": 502.604,
145
- "p95": 608.336,
146
- "p99": 712.908,
147
- "avg": 278.878
148
- },
149
- "inter_token_latency_ms": {
150
- "p50": 152.068,
151
- "p60": 183.639,
152
- "p70": 208.294,
153
- "p80": 210.057,
154
- "p90": 211.894,
155
- "p95": 421.244,
156
- "p99": 436.578,
157
- "avg": 190.502
158
- },
159
- "failed_requests": 0,
160
- "successful_requests": 97,
161
- "request_rate": 0.8176045319890011,
162
- "total_tokens_sent": 776000,
163
- "e2e_latency_ms": {
164
- "p50": 89809.719,
165
- "p60": 90599.198,
166
- "p70": 97086.861,
167
- "p80": 97763.592,
168
- "p90": 102705.608,
169
- "p95": 105891.319,
170
- "p99": 109209.372,
171
- "avg": 80168.287
172
- }
173
- },
174
- {
175
- "id": "[email protected]/s",
176
- "executor_type": "ConstantArrivalRate",
177
- "config": {
178
- "max_vus": 800,
179
- "duration_secs": 120,
180
- "rate": 30.0
181
- },
182
- "total_requests": 108,
183
- "total_tokens": 48755,
184
- "token_throughput_secs": 408.5182278415837,
185
- "duration_ms": 119345,
186
- "time_to_first_token_ms": {
187
- "p50": 315.639,
188
- "p60": 364.113,
189
- "p70": 440.936,
190
- "p80": 517.15,
191
- "p90": 635.496,
192
- "p95": 743.467,
193
- "p99": 886.077,
194
- "avg": 348.945
195
- },
196
- "inter_token_latency_ms": {
197
- "p50": 172.827,
198
- "p60": 189.057,
199
- "p70": 196.538,
200
- "p80": 201.266,
201
- "p90": 442.975,
202
- "p95": 465.991,
203
- "p99": 473.842,
204
- "avg": 207.845
205
- },
206
- "failed_requests": 0,
207
- "successful_requests": 108,
208
- "request_rate": 0.9049321835071489,
209
- "total_tokens_sent": 864000,
210
- "e2e_latency_ms": {
211
- "p50": 89868.756,
212
- "p60": 96902.23,
213
- "p70": 98937.333,
214
- "p80": 102789.849,
215
- "p90": 109541.9,
216
- "p95": 111388.456,
217
- "p99": 114281.927,
218
- "avg": 82072.638
219
- }
220
- },
221
- {
222
- "id": "[email protected]/s",
223
- "executor_type": "ConstantArrivalRate",
224
- "config": {
225
- "max_vus": 800,
226
- "duration_secs": 120,
227
- "rate": 100.0
228
- },
229
- "total_requests": 125,
230
- "total_tokens": 57918,
231
- "token_throughput_secs": 485.359321343381,
232
- "duration_ms": 119330,
233
- "time_to_first_token_ms": {
234
- "p50": 1154.434,
235
- "p60": 1276.393,
236
- "p70": 1440.368,
237
- "p80": 1604.069,
238
- "p90": 1768.54,
239
- "p95": 1850.13,
240
- "p99": 1919.678,
241
- "avg": 1208.132
242
- },
243
- "inter_token_latency_ms": {
244
- "p50": 166.875,
245
- "p60": 166.884,
246
- "p70": 167.245,
247
- "p80": 188.28,
248
- "p90": 350.172,
249
- "p95": 417.485,
250
- "p99": 437.566,
251
- "avg": 186.06
252
- },
253
- "failed_requests": 0,
254
- "successful_requests": 125,
255
- "request_rate": 1.047513988188864,
256
- "total_tokens_sent": 1000000,
257
- "e2e_latency_ms": {
258
- "p50": 82803.004,
259
- "p60": 89976.229,
260
- "p70": 90374.914,
261
- "p80": 99727.225,
262
- "p90": 108866.194,
263
- "p95": 113444.528,
264
- "p99": 116545.189,
265
- "avg": 77917.015
266
- }
267
- }
268
- ],
269
- "start_time": "2025-05-21T13:41:44.260015742+00:00",
270
- "end_time": "2025-05-21T13:56:47.150683889+00:00",
271
- "system": {
272
- "cpu": [
273
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu0@4699MHz",
274
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu1@4699MHz",
275
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu2@4699MHz",
276
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu3@4699MHz",
277
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu4@4699MHz",
278
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu5@4699MHz",
279
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu6@4699MHz",
280
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu7@4699MHz",
281
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu8@4699MHz",
282
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu9@4699MHz",
283
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu10@4699MHz",
284
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu11@4699MHz",
285
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu12@4699MHz",
286
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu13@4699MHz",
287
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu14@4699MHz",
288
- "AMD Ryzen 7 9800X3D 8-Core Processor cpu15@4699MHz"
289
- ],
290
- "memory": "83.47 GB",
291
- "os_name": "Debian GNU/Linux",
292
- "os_version": "11",
293
- "kernel": "5.15.167.4-microsoft-standard-WSL2",
294
- "hostname": "computer"
295
- }
296
- }