File size: 5,713 Bytes
f12cdfe ee6a180 f12cdfe ee6a180 f12cdfe ee6a180 f12cdfe ee6a180 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
"""
Collect data from the multiple sources and create a base datafranme for the LLMCalculator table
Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency
Pricing - pricing.json
Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json
"""
import pandas as pd
import json
import requests
from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX
import os
def validate_request(url: str, response) -> bool:
"""
Validate if an HTTP request was successful.
Args:
url (str): The URL that was requested
response (requests.Response): The response object from the request
Returns:
bool: True if request was successful (status code 200), False otherwise
"""
if response.status_code != 200:
print(f"Failed to read file - {url}. Status Code: {response.status_code}")
return False
return True
def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple:
"""
Fetch and parse benchmark results and latency data from CSV files.
Args:
benchmark (str): Type of benchmark to fetch ('text' or 'multimodal')
version_names (list): List of version names to search through, sorted by latest first
Returns:
tuple[pd.DataFrame, pd.DataFrame]: A tuple containing:
- results_df: DataFrame with benchmark results
- latency_df: DataFrame with latency measurements
Returns (None, None) if no matching version is found or requests fail
Raises:
requests.RequestException: If there's an error fetching the data
pd.errors.EmptyDataError: If CSV file is empty
pd.errors.ParserError: If CSV parsing fails
"""
for v in version_names:
# Check if version matches benchmark type
is_multimodal = 'multimodal' in v
if (benchmark == "multimodal") != is_multimodal:
continue
# Construct URLs
results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE)
latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX)
try:
results = requests.get(results_url)
latency = requests.get(latency_url)
if validate_request(results_url, results) and validate_request(latency_url, latency):
# Convert the CSV content to pandas DataFrames
results_df = pd.read_csv(pd.io.common.StringIO(results.text))
latency_df = pd.read_csv(pd.io.common.StringIO(latency.text))
return results_df, latency_df
except requests.RequestException as e:
print(f"Error fetching data for version {v}: {e}")
except pd.errors.EmptyDataError:
print(f"Error: Empty CSV file found for version {v}")
except pd.errors.ParserError:
print(f"Error: Unable to parse CSV data for version {v}")
return None, None
def fetch_version_metadata() -> tuple:
"""
Fetch and process benchmark metadata from the Clembench GitHub repository.
The data is sourced from: https://github.com/clembench/clembench-runs
Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO
Returns:
tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
- mm_result: Multimodal benchmark results
- mm_latency: Multimodal latency data
- text_result: Text benchmark results
- text_latency: Text latency data
Returns (None, None, None, None) if the request fails
"""
json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE
response = requests.get(json_url)
# Check if the JSON file request was successful
if not validate_request(json_url, response):
return None, None, None, None
json_data = response.json()
versions = json_data['versions']
# Sort the versions in benchmark by latest first
version_names = sorted(
[ver['version'] for ver in versions],
key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),
reverse=True
)
# Latency is in seconds
mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names)
text_result, text_latency = fetch_benchmark_data("text", version_names)
return mm_latency, mm_result, text_latency, text_result
def fetch_registry_data() -> dict:
"""
Fetch and parse model registry data from the Clembench registry URL.
The data is sourced from the model registry defined in REGISTRY_URL.
Contains information about various LLM models including their specifications
and capabilities.
Returns:
dict: Dictionary containing model registry data.
Returns None if the request fails or the JSON is invalid.
Raises:
requests.RequestException: If there's an error fetching the data
json.JSONDecodeError: If the response cannot be parsed as JSON
"""
try:
response = requests.get(REGISTRY_URL)
if not validate_request(REGISTRY_URL, response):
return None
return response.json()
except requests.RequestException as e:
print(f"Error fetching registry data: {e}")
except json.JSONDecodeError as e:
print(f"Error parsing registry JSON: {e}")
return None
if __name__=="__main__":
fetch_version_metadata()
registry_data = fetch_registry_data()
print(registry_data[0])
|