{ "current_gpu_type": "NVIDIA L40S", "current_gpu_total_memory": 45372.6875, "memory_inference_first": 4312.0, "memory_inference": 4208.0, "token_generation_latency_sync": 46.756124877929686, "token_generation_latency_async": 46.56997695565224, "token_generation_throughput_sync": 0.02138757227231272, "token_generation_throughput_async": 0.02147306194616077, "token_generation_CO2_emissions": 3.367901542186602e-06, "token_generation_energy_consumption": 0.002321453711341601, "inference_latency_sync": 39.73529586791992, "inference_latency_async": 38.7270450592041, "inference_throughput_sync": 0.02516654219271448, "inference_throughput_async": 0.025821748043808834, "inference_CO2_emissions": 3.481497167551675e-06, "inference_energy_consumption": 1.445772659465061e-05 }