File size: 5,885 Bytes
92c24be |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
def calculate_training_metrics(
gpu_choice, precision, num_gpus, num_parameters, dataset_tokens,
num_epochs, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85
):
"""
Calculates both the training time and cost for LLM training with parallel computing.
Args:
- gpu_choice (str): The choice of GPU model
- precision (str): The precision level for training
- num_gpus (int): Number of GPUs for parallel computing
- num_parameters (float): Number of model parameters in billions
- dataset_tokens (float): Number of tokens in the dataset
- num_epochs (int): Number of training epochs
- utilization_rate (float): GPU utilization rate (0 < rate ≤ 1)
- overhead (float): Overhead multiplier for additional costs
- cost_per_gpu_hour (float): Cost per GPU hour in dollars
Returns:
- tuple: (total_cost, training_days, training_hours)
"""
# GPU throughputs in FLOPS (operations per second)
gpu_throughputs = {
'A100 80GB PCIe': {'bf16': 312e12, 'tf32': 156e12},
'A100 80GB SXM': {'bf16': 624e12, 'tf32': 312e12},
'V100': {'tensor': 130e12},
'H100 SXM': {'bf16': 1979e12, 'tf32': 989e12},
'H100 PCIe': {'bf16': 1513e12, 'tf32': 756e12}
}
# Get the base GPU throughput
base_throughput = gpu_throughputs[gpu_choice][precision]
# Calculate effective throughput with multiple GPUs
# Assuming 90% scaling efficiency for parallel computing
parallel_efficiency = 0.9
effective_throughput = base_throughput * num_gpus * parallel_efficiency
# Calculate total tokens processed (dataset_tokens * epochs)
total_tokens = dataset_tokens * num_epochs
# Calculate total FLOPS needed (6 operations per parameter per token)
total_flops = 6 * num_parameters * total_tokens
# Calculate raw computation hours needed
compute_hours = total_flops / (effective_throughput * 3600)
# Adjust for utilization rate and overhead
actual_hours = (compute_hours / utilization_rate) * overhead
# Calculate days and remaining hours
training_days = int(actual_hours // 24)
training_hours = actual_hours % 24
# Calculate total cost (cost per GPU * number of GPUs * hours)
total_cost = actual_hours * cost_per_gpu_hour * num_gpus
return total_cost, training_days, training_hours
def gradio_interface(
gpu_choice, precision, num_gpus, num_parameters, dataset_tokens,
num_epochs, utilization_rate, overhead, cost_per_gpu_hour
):
# Convert inputs to appropriate numeric types
num_parameters = float(num_parameters) * 1e9 # Convert billions to actual number
dataset_tokens = float(dataset_tokens) * 1e9 # Convert billions to actual number
num_gpus = int(num_gpus)
num_epochs = int(num_epochs)
utilization_rate = float(utilization_rate)
overhead = float(overhead)
cost_per_gpu_hour = float(cost_per_gpu_hour)
# Calculate metrics
cost, days, hours = calculate_training_metrics(
gpu_choice, precision, num_gpus, num_parameters, dataset_tokens,
num_epochs, utilization_rate, overhead, cost_per_gpu_hour
)
# Format the output message
time_msg = f"{days} days and {hours:.1f} hours"
cost_msg = f"{cost:,.2f}$"
return time_msg, cost_msg
# Define available GPU choices and their default precisions
gpu_choices = ["A100 80GB PCIe", "A100 80GB SXM", "V100", "H100 SXM", "H100 PCIe"]
# Create the Gradio interface
title = "<h2 style='text-align: center;'>LLM Training Time and Cost Calculator</h2>"
description = """
<p style='text-align: center;'>Calculate both the training time and cost for large language models (LLM) with parallel computing support.</p>
<p><strong>Input Parameters:</strong></p>
<ul>
<li><strong>GPU Selection:</strong> Choose from various GPU models with different compute capabilities</li>
<li><strong>Number of GPUs:</strong> Specify how many GPUs to use in parallel</li>
<li><strong>Model Size:</strong> Number of parameters in billions</li>
<li><strong>Dataset Size:</strong> Number of tokens in your dataset in billions</li>
<li><strong>Training Epochs:</strong> Number of times to iterate over the dataset</li>
<li><strong>Utilization Rate:</strong> Expected GPU utilization (typically 0.4-0.7)</li>
<li><strong>Overhead:</strong> Additional time/cost factor for data loading, checkpointing, etc.</li>
</ul>
Ouputs:
<ul>
<li><strong>Estimated Training Time:</strong> Total days and hours required for training</li>
<li><strong>Estimated Training Cost:</strong> Total cost in dollars based on GPU hours</li>
</ul>
Modified from <a href="https://huggingface.co/spaces/Heng666/LLM-Training-Cost-Calculator">this Hf Space</a>.
"""
iface = gr.Interface(
fn=gradio_interface,
inputs=[
gr.Dropdown(choices=gpu_choices, label="Select GPU", value='A100 80GB PCIe'),
gr.Dropdown(choices=['bf16', 'tf32', 'tensor'], label="Select Precision", value='bf16'),
gr.Number(label="Number of GPUs", value=1, minimum=1, maximum=1024),
gr.Number(label="Number of Parameters (billions)", value=70),
gr.Number(label="Dataset Tokens (billions)", value=1),
gr.Number(label="Number of Epochs", value=3, minimum=1),
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
gr.Slider(minimum=1.0, maximum=2.0, step=0.01, value=1.10, label="Overhead Factor"),
gr.Number(label="Cost per GPU Hour ($)", value=1.85)
],
outputs=[gr.Textbox(label="Estimated Training Time:"),
gr.Textbox(label="Estimated Training Cost:")],
title=title,
description=description,
article="<p style='text-align: center;'>Improved with good intentions by ghost.</p>"
)
if __name__ == "__main__":
iface.launch() |