File size: 5,713 Bytes
f12cdfe
 
 
 
 
 
 
 
 
 
ee6a180
 
f12cdfe
ee6a180
 
 
 
 
 
 
 
 
 
 
f12cdfe
ee6a180
 
 
 
f12cdfe
ee6a180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""
Collect data from the multiple sources and create a base datafranme for the LLMCalculator table
Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency
Pricing - pricing.json
Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json
"""

import pandas as pd
import json
import requests
from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX
import os

def validate_request(url: str, response) -> bool:
    """
    Validate if an HTTP request was successful.
    
    Args:
        url (str): The URL that was requested
        response (requests.Response): The response object from the request
        
    Returns:
        bool: True if request was successful (status code 200), False otherwise
    """

    if response.status_code != 200:
        print(f"Failed to read file - {url}. Status Code: {response.status_code}")
        return False
    return True

def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple:
    """
    Fetch and parse benchmark results and latency data from CSV files.
    
    Args:
        benchmark (str): Type of benchmark to fetch ('text' or 'multimodal')
        version_names (list): List of version names to search through, sorted by latest first
        
    Returns:
        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing:
            - results_df: DataFrame with benchmark results
            - latency_df: DataFrame with latency measurements
            Returns (None, None) if no matching version is found or requests fail
            
    Raises:
        requests.RequestException: If there's an error fetching the data
        pd.errors.EmptyDataError: If CSV file is empty
        pd.errors.ParserError: If CSV parsing fails
    """
    for v in version_names:
        # Check if version matches benchmark type
        is_multimodal = 'multimodal' in v
        if (benchmark == "multimodal") != is_multimodal:
            continue
            
        # Construct URLs
        results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE)
        latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX)
        
        try:
            results = requests.get(results_url)
            latency = requests.get(latency_url)
            
            if validate_request(results_url, results) and validate_request(latency_url, latency):
                # Convert the CSV content to pandas DataFrames
                results_df = pd.read_csv(pd.io.common.StringIO(results.text))
                latency_df = pd.read_csv(pd.io.common.StringIO(latency.text))
                return results_df, latency_df
                
        except requests.RequestException as e:
            print(f"Error fetching data for version {v}: {e}")
        except pd.errors.EmptyDataError:
            print(f"Error: Empty CSV file found for version {v}")
        except pd.errors.ParserError:
            print(f"Error: Unable to parse CSV data for version {v}")
            
    return None, None

def fetch_version_metadata() -> tuple:
    """
    Fetch and process benchmark metadata from the Clembench GitHub repository.
    
    The data is sourced from: https://github.com/clembench/clembench-runs
    Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO
    
    Returns:
        tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
            - mm_result: Multimodal benchmark results
            - mm_latency: Multimodal latency data
            - text_result: Text benchmark results
            - text_latency: Text latency data
            Returns (None, None, None, None) if the request fails
    """
    json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE
    response = requests.get(json_url)

    # Check if the JSON file request was successful
    if not validate_request(json_url, response):
        return None, None, None, None

    json_data = response.json()
    versions = json_data['versions']

    # Sort the versions in benchmark by latest first
    version_names = sorted(
        [ver['version'] for ver in versions],
        key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),  
        reverse=True
    )

    # Latency is in  seconds
    mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names)
    text_result, text_latency = fetch_benchmark_data("text", version_names)

    return mm_latency, mm_result, text_latency, text_result

def fetch_registry_data() -> dict:
    """
    Fetch and parse model registry data from the Clembench registry URL.
    
    The data is sourced from the model registry defined in REGISTRY_URL.
    Contains information about various LLM models including their specifications
    and capabilities.
    
    Returns:
        dict: Dictionary containing model registry data.
        Returns None if the request fails or the JSON is invalid.
        
    Raises:
        requests.RequestException: If there's an error fetching the data
        json.JSONDecodeError: If the response cannot be parsed as JSON
    """
    try:
        response = requests.get(REGISTRY_URL)
        if not validate_request(REGISTRY_URL, response):
            return None
            
        return response.json()
        
    except requests.RequestException as e:
        print(f"Error fetching registry data: {e}")
    except json.JSONDecodeError as e:
        print(f"Error parsing registry JSON: {e}")
    
    return None

if __name__=="__main__":
    fetch_version_metadata()
    registry_data = fetch_registry_data()
    print(registry_data[0])