Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
·
31b5924
1
Parent(s):
9fd9223
Update benchmark.py
Browse files- scripts/benchmark.py +38 -38
scripts/benchmark.py
CHANGED
|
@@ -40,12 +40,36 @@ SYSTEM_PROMPTS = {
|
|
| 40 |
),
|
| 41 |
}
|
| 42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
@dataclass
|
| 44 |
class Output:
|
| 45 |
response_length: int
|
| 46 |
input: str
|
| 47 |
output: str
|
| 48 |
|
|
|
|
| 49 |
@torch.inference_mode()
|
| 50 |
def run_inference(
|
| 51 |
model,
|
|
@@ -218,9 +242,6 @@ def run_inference(
|
|
| 218 |
|
| 219 |
return result
|
| 220 |
|
| 221 |
-
def write_error_to_file(filename, error_message):
|
| 222 |
-
with open(filename, 'a') as file:
|
| 223 |
-
file.write(error_message + '\n')
|
| 224 |
|
| 225 |
def main(
|
| 226 |
model_path: str,
|
|
@@ -232,7 +253,7 @@ def main(
|
|
| 232 |
temperature: float = 0.7,
|
| 233 |
repitition_penalty: float = 1.0,
|
| 234 |
max_new_tokens: int = 512,
|
| 235 |
-
|
| 236 |
) -> None:
|
| 237 |
"""Run benchmarking for one model on the entire input file.
|
| 238 |
|
|
@@ -262,8 +283,8 @@ def main(
|
|
| 262 |
model_path = model_path[:-1]
|
| 263 |
model_name_cleaned = "--".join(model_path.split("/")[-2:])
|
| 264 |
output_dir = f"{output_dir}/{task}/{model_name_cleaned}"
|
| 265 |
-
output_csv_path = f"{output_dir}/benchmark_batch_{
|
| 266 |
-
config_json_path = f"{output_dir}/
|
| 267 |
table = Table(title="Benchmark")
|
| 268 |
table.add_column("Configuration")
|
| 269 |
table.add_column("Value")
|
|
@@ -341,45 +362,23 @@ def main(
|
|
| 341 |
"temperature": temperature,
|
| 342 |
"repitition_penalty": repitition_penalty,
|
| 343 |
"max_new_tokens": max_new_tokens,
|
| 344 |
-
"batch_size":
|
| 345 |
},
|
| 346 |
config_json,
|
| 347 |
indent=4,
|
| 348 |
)
|
| 349 |
config_json.write("\n")
|
| 350 |
|
| 351 |
-
class CustomDataset(Dataset):
|
| 352 |
-
def __init__(self, data):
|
| 353 |
-
self.data = data
|
| 354 |
-
|
| 355 |
-
def __len__(self):
|
| 356 |
-
return len(self.data)
|
| 357 |
-
|
| 358 |
-
def __getitem__(self, index):
|
| 359 |
-
sample = self.data[index]
|
| 360 |
-
return sample["conversations"][0]["value"]
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
def dataloader(input_file: str, batch_size: batch) -> Generator[tuple[bool, str], None, None]:
|
| 364 |
-
"""Yields a tuple of whether this is a warmup run and the input prompt."""
|
| 365 |
-
for _ in range(3):
|
| 366 |
-
yield True, ["Say something long and random. I don't care about the content." for _ in range (batch)]
|
| 367 |
-
data = json.load(open(input_file, "r"))
|
| 368 |
-
custom_dataset = CustomDataset(data)
|
| 369 |
-
data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=False)
|
| 370 |
-
for prompt in data_loader:
|
| 371 |
-
yield False, prompt
|
| 372 |
-
|
| 373 |
# Warm up the GPU with some random prompts.
|
| 374 |
# Forward through all the prompts.
|
| 375 |
is_first = True
|
| 376 |
convs = []
|
| 377 |
prompts = []
|
| 378 |
-
data_iter = iter(dataloader(input_file,
|
| 379 |
|
| 380 |
for is_warmup, input_prompts in data_iter:
|
| 381 |
# Construct the input prompt.
|
| 382 |
-
for i in range(
|
| 383 |
conv = copy.deepcopy(conv_base)
|
| 384 |
conv.append_message(conv.roles[0], input_prompts[i])
|
| 385 |
conv.append_message(conv.roles[1], "")
|
|
@@ -404,18 +403,19 @@ def main(
|
|
| 404 |
if results:
|
| 405 |
# Record numbers.
|
| 406 |
if not is_warmup:
|
| 407 |
-
|
| 408 |
-
latency = measurements.time
|
| 409 |
-
throughput =
|
| 410 |
-
energy = measurements.total_energy
|
|
|
|
| 411 |
output = {
|
| 412 |
"model": model_name_cleaned,
|
| 413 |
"throughput": throughput,
|
| 414 |
-
"response_length":
|
| 415 |
"latency": latency,
|
| 416 |
-
"energy": energy,
|
| 417 |
"input": [prompt.strip() for prompt in prompts],
|
| 418 |
-
"output": [
|
| 419 |
}
|
| 420 |
output_str = json.dumps(output, indent=4)
|
| 421 |
if not is_warmup:
|
|
|
|
| 40 |
),
|
| 41 |
}
|
| 42 |
|
| 43 |
+
class CustomDataset(Dataset):
|
| 44 |
+
def __init__(self, data):
|
| 45 |
+
self.data = data
|
| 46 |
+
|
| 47 |
+
def __len__(self):
|
| 48 |
+
return len(self.data)
|
| 49 |
+
|
| 50 |
+
def __getitem__(self, index):
|
| 51 |
+
sample = self.data[index]
|
| 52 |
+
return sample["conversations"][0]["value"]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def dataloader(input_file: str, batch_size: int) -> Generator[tuple[bool, list[str]], None, None]:
|
| 56 |
+
"""Yields a tuple of whether this is a warmup run and the input prompt."""
|
| 57 |
+
for _ in range(3):
|
| 58 |
+
yield True, ["Say something long and random. I don't care about the content." for _ in range (batch_size)]
|
| 59 |
+
data = json.load(open(input_file, "r"))
|
| 60 |
+
custom_dataset = CustomDataset(data)
|
| 61 |
+
data_loader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=False)
|
| 62 |
+
for prompt in data_loader:
|
| 63 |
+
yield False, prompt
|
| 64 |
+
|
| 65 |
+
|
| 66 |
@dataclass
|
| 67 |
class Output:
|
| 68 |
response_length: int
|
| 69 |
input: str
|
| 70 |
output: str
|
| 71 |
|
| 72 |
+
|
| 73 |
@torch.inference_mode()
|
| 74 |
def run_inference(
|
| 75 |
model,
|
|
|
|
| 242 |
|
| 243 |
return result
|
| 244 |
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
def main(
|
| 247 |
model_path: str,
|
|
|
|
| 253 |
temperature: float = 0.7,
|
| 254 |
repitition_penalty: float = 1.0,
|
| 255 |
max_new_tokens: int = 512,
|
| 256 |
+
batch_size: int = 1,
|
| 257 |
) -> None:
|
| 258 |
"""Run benchmarking for one model on the entire input file.
|
| 259 |
|
|
|
|
| 283 |
model_path = model_path[:-1]
|
| 284 |
model_name_cleaned = "--".join(model_path.split("/")[-2:])
|
| 285 |
output_dir = f"{output_dir}/{task}/{model_name_cleaned}"
|
| 286 |
+
output_csv_path = f"{output_dir}/benchmark_batch_{batch_size}.json"
|
| 287 |
+
config_json_path = f"{output_dir}/config_batch_{batch_size}.json"
|
| 288 |
table = Table(title="Benchmark")
|
| 289 |
table.add_column("Configuration")
|
| 290 |
table.add_column("Value")
|
|
|
|
| 362 |
"temperature": temperature,
|
| 363 |
"repitition_penalty": repitition_penalty,
|
| 364 |
"max_new_tokens": max_new_tokens,
|
| 365 |
+
"batch_size": batch_size,
|
| 366 |
},
|
| 367 |
config_json,
|
| 368 |
indent=4,
|
| 369 |
)
|
| 370 |
config_json.write("\n")
|
| 371 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
# Warm up the GPU with some random prompts.
|
| 373 |
# Forward through all the prompts.
|
| 374 |
is_first = True
|
| 375 |
convs = []
|
| 376 |
prompts = []
|
| 377 |
+
data_iter = iter(dataloader(input_file, batch_size))
|
| 378 |
|
| 379 |
for is_warmup, input_prompts in data_iter:
|
| 380 |
# Construct the input prompt.
|
| 381 |
+
for i in range(batch_size):
|
| 382 |
conv = copy.deepcopy(conv_base)
|
| 383 |
conv.append_message(conv.roles[0], input_prompts[i])
|
| 384 |
conv.append_message(conv.roles[1], "")
|
|
|
|
| 403 |
if results:
|
| 404 |
# Record numbers.
|
| 405 |
if not is_warmup:
|
| 406 |
+
total_num_tokens = sum([result.response_length for result in results]) # total number of tokens
|
| 407 |
+
latency = measurements.time # seconds, identical for all requests
|
| 408 |
+
throughput = total_num_tokens / latency # tokens per second
|
| 409 |
+
energy = measurements.total_energy # Joules, total across all requests
|
| 410 |
+
# Fields should be interpreted as per-request
|
| 411 |
output = {
|
| 412 |
"model": model_name_cleaned,
|
| 413 |
"throughput": throughput,
|
| 414 |
+
"response_length": total_num_tokens / batch_size,
|
| 415 |
"latency": latency,
|
| 416 |
+
"energy": energy / batch_size,
|
| 417 |
"input": [prompt.strip() for prompt in prompts],
|
| 418 |
+
"output": [result.output.strip() for result in results],
|
| 419 |
}
|
| 420 |
output_str = json.dumps(output, indent=4)
|
| 421 |
if not is_warmup:
|