Spaces:

Koshti10
/

LLMCalc

Running

File size: 5,713 Bytes

f12cdfe
 
 
 
 
 
 
 
 
 
ee6a180
 
f12cdfe
ee6a180
 
 
 
 
 
 
 
 
 
 
f12cdfe
ee6a180
 
 
 
f12cdfe
ee6a180

"""
Collect data from the multiple sources and create a base datafranme for the LLMCalculator table
Latency - https://github.com/clembench/clembench-runs/tree/main/Addenda/Latency
Pricing - pricing.json
Model info - https://github.com/kushal-10/clembench/blob/feat/registry/backends/model_registry_updated.json
"""

import pandas as pd
import json
import requests
from assets.text_content import CLEMBENCH_RUNS_REPO, REGISTRY_URL, BENCHMARK_FILE, LATENCY_FOLDER, RESULT_FILE, LATENCY_SUFFIX
import os

def validate_request(url: str, response) -> bool:
    """
    Validate if an HTTP request was successful.
    
    Args:
        url (str): The URL that was requested
        response (requests.Response): The response object from the request
        
    Returns:
        bool: True if request was successful (status code 200), False otherwise
    """

    if response.status_code != 200:
        print(f"Failed to read file - {url}. Status Code: {response.status_code}")
        return False
    return True

def fetch_benchmark_data(benchmark: str = "text", version_names: list = []) -> tuple:
    """
    Fetch and parse benchmark results and latency data from CSV files.
    
    Args:
        benchmark (str): Type of benchmark to fetch ('text' or 'multimodal')
        version_names (list): List of version names to search through, sorted by latest first
        
    Returns:
        tuple[pd.DataFrame, pd.DataFrame]: A tuple containing:
            - results_df: DataFrame with benchmark results
            - latency_df: DataFrame with latency measurements
            Returns (None, None) if no matching version is found or requests fail
            
    Raises:
        requests.RequestException: If there's an error fetching the data
        pd.errors.EmptyDataError: If CSV file is empty
        pd.errors.ParserError: If CSV parsing fails
    """
    for v in version_names:
        # Check if version matches benchmark type
        is_multimodal = 'multimodal' in v
        if (benchmark == "multimodal") != is_multimodal:
            continue
            
        # Construct URLs
        results_url = os.path.join(CLEMBENCH_RUNS_REPO, v, RESULT_FILE)
        latency_url = os.path.join(CLEMBENCH_RUNS_REPO, LATENCY_FOLDER, v + LATENCY_SUFFIX)
        
        try:
            results = requests.get(results_url)
            latency = requests.get(latency_url)
            
            if validate_request(results_url, results) and validate_request(latency_url, latency):
                # Convert the CSV content to pandas DataFrames
                results_df = pd.read_csv(pd.io.common.StringIO(results.text))
                latency_df = pd.read_csv(pd.io.common.StringIO(latency.text))
                return results_df, latency_df
                
        except requests.RequestException as e:
            print(f"Error fetching data for version {v}: {e}")
        except pd.errors.EmptyDataError:
            print(f"Error: Empty CSV file found for version {v}")
        except pd.errors.ParserError:
            print(f"Error: Unable to parse CSV data for version {v}")
            
    return None, None

def fetch_version_metadata() -> tuple:
    """
    Fetch and process benchmark metadata from the Clembench GitHub repository.
    
    The data is sourced from: https://github.com/clembench/clembench-runs
    Configure the repository path in src/assets/text_content/CLEMBENCH_RUNS_REPO
    
    Returns:
        tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, pd.DataFrame]: A tuple containing:
            - mm_result: Multimodal benchmark results
            - mm_latency: Multimodal latency data
            - text_result: Text benchmark results
            - text_latency: Text latency data
            Returns (None, None, None, None) if the request fails
    """
    json_url = CLEMBENCH_RUNS_REPO + BENCHMARK_FILE
    response = requests.get(json_url)

    # Check if the JSON file request was successful
    if not validate_request(json_url, response):
        return None, None, None, None

    json_data = response.json()
    versions = json_data['versions']

    # Sort the versions in benchmark by latest first
    version_names = sorted(
        [ver['version'] for ver in versions],
        key=lambda v: list(map(int, v[1:].split('_')[0].split('.'))),  
        reverse=True
    )

    # Latency is in  seconds
    mm_result, mm_latency = fetch_benchmark_data("multimodal", version_names)
    text_result, text_latency = fetch_benchmark_data("text", version_names)

    return mm_latency, mm_result, text_latency, text_result

def fetch_registry_data() -> dict:
    """
    Fetch and parse model registry data from the Clembench registry URL.
    
    The data is sourced from the model registry defined in REGISTRY_URL.
    Contains information about various LLM models including their specifications
    and capabilities.
    
    Returns:
        dict: Dictionary containing model registry data.
        Returns None if the request fails or the JSON is invalid.
        
    Raises:
        requests.RequestException: If there's an error fetching the data
        json.JSONDecodeError: If the response cannot be parsed as JSON
    """
    try:
        response = requests.get(REGISTRY_URL)
        if not validate_request(REGISTRY_URL, response):
            return None
            
        return response.json()
        
    except requests.RequestException as e:
        print(f"Error fetching registry data: {e}")
    except json.JSONDecodeError as e:
        print(f"Error parsing registry JSON: {e}")
    
    return None

if __name__=="__main__":
    fetch_version_metadata()
    registry_data = fetch_registry_data()
    print(registry_data[0])