Spaces:

bardsai
/

performance-llm-board

Running

File size: 15,652 Bytes

import os
from dataclasses import dataclass
import re


@dataclass(unsafe_hash=True)
class Model(object):
    model_display_name: str
    model_name: str
    api_url: str
    provider: str
    hourly_cost: int = None
    cost_description: str = None
    supports_functions: str = False
    size_billion_parameters: int = None
    cost_per_million_tokens: int = None
    cost_per_million_input_tokens: int = None
    cost_per_million_output_tokens: int = None
    context_length: int = None
    selected: bool = False

    def __post_init__(self):
        self.cost_per_million_input_tokens = self.cost_per_million_input_tokens or self.cost_per_million_tokens
        self.cost_per_million_output_tokens = self.cost_per_million_output_tokens or self.cost_per_million_tokens

    @property
    def cost(self):
        if self.cost_description:
            return self.cost_description
        if self.hourly_cost:
            return f"${self.hourly_cost:.2g} / hour"
        if self.cost_per_million_tokens:
            return f"${self.cost_per_million_tokens:.2g} / 1M tokens"
        elif self.cost_per_million_input_tokens and self.cost_per_million_output_tokens:
            return f"${self.cost_per_million_input_tokens:.2g} / 1M input tokens, ${self.cost_per_million_output_tokens:.2g} / 1M output tokens"


env = os.environ.get

MODELS = [
    # source: https://openai.com/pricing
    # converted costs from dollar/1K tokens to dollar/1M for readability and together_ai comparability
    Model(
        "gpt-3.5-turbo",
        "gpt-3.5-turbo",
        None,
        "OpenAI",
        supports_functions=True,
        cost_per_million_input_tokens=1,
        cost_per_million_output_tokens=2,
        # https://learn.microsoft.com/en-us/answers/questions/1356487/what-is-the-exact-maximum-input-tokens-of-azure-gp
        context_length=4096,
    ),
    Model(
        "gpt-4-turbo",
        "gpt-4-1106-preview",
        None,
        "OpenAI",
        supports_functions=True,
        cost_per_million_input_tokens=10,
        cost_per_million_output_tokens=30,
        # https://writesonic.com/blog/gpt-4-turbo-vs-gpt-4
        context_length=128_000,
    ),
    Model(
        "gpt-4",
        "gpt-4",
        None,
        "OpenAI",
        supports_functions=True,
        cost_per_million_input_tokens=30,
        cost_per_million_output_tokens=60,
        context_length=32_000,
    ),
    # source: https://www.together.ai/pricing
    Model(
        "llama-2-70b-chat",
        "together_ai/togethercomputer/llama-2-70b-chat",
        None,
        "Together AI",
        cost_per_million_tokens=0.9,
        size_billion_parameters=70,
        # https://github.com/facebookresearch/llama/issues/148
        context_length=2048,
    ),
    Model(
        "Mixtral-8x7B-Instruct-v0.1",
        "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
        None,
        "Together AI",
        size_billion_parameters=8 * 7,
        cost_per_million_tokens=0.9,
    ),
    # taken from endpoint pages
    Model(
        "zephyr-7b-beta",
        "huggingface/HuggingFaceH4/zephyr-7b-beta",
        env("ZEPHYR_7B_BETA_URL"),
        "Hugging Face Inference Endpoint",
        hourly_cost=1.30,
        size_billion_parameters=7,
    ),
    Model(
        "Mistral-7B-Instruct-v0.2",
        "huggingface/mistralai/Mistral-7B-Instruct-v0.2",
        env("MISTRAL_7B_BETA_URL"),
        "Hugging Face Inference Endpoint",
        hourly_cost=1.30,
        size_billion_parameters=7,
        selected=True,
    ),
    Model(
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        "huggingface/TinyLlama/TinyLlama-1.1B-Chat-v1.0",
        env("TINY_LLAMA_URL"),
        "Hugging Face Inference Endpoint",
        hourly_cost=0.60,
        size_billion_parameters=1.1,
    ),
    Model(
        "gemini-pro",
        "gemini-pro",
        None,
        "Google VertexAI",
        # https://ai.google.dev/pricing
        cost_description="$0.25 / 1M input characters, $0.5 / 1M output characters (60 queries per minute are free)",
        cost_per_million_input_tokens=0.25,
        cost_per_million_output_tokens=0.5,
    ),
    Model(
        "chat-bison (PaLM 2)",
        "chat-bison",
        None,
        "Google VertexAI",
        # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
        cost_per_million_input_tokens=0.25,
        cost_per_million_output_tokens=0.5,
        # https://ai.google.dev/models/palm
        context_length=8196,
    ),
    Model(
        "chat-bison-32k (PaLM 2 32K)",
        "chat-bison-32k",
        None,
        "Google VertexAI",
        # https://cloud.google.com/vertex-ai/docs/generative-ai/pricing
        cost_per_million_input_tokens=0.25,
        cost_per_million_output_tokens=0.5,
    ),
    # new models:
    Model(
        "01-ai Yi Chat (34B)",
        "together_ai/zero-one-ai/Yi-34B-Chat",
        None,
        "Together AI",
        context_length=4096,
        # selected=True,
    ),
    Model(
        "Chronos Hermes (13B)",
        "together_ai/Austism/chronos-hermes-13b",
        None,
        "Together AI",
        context_length=2048,
        # selected=True,
    ),
    Model(
        "Deepseek Coder Instruct (33B)",
        "together_ai/deepseek-ai/deepseek-coder-33b-instruct",
        None,
        "Together AI",
        context_length=16384,
    ),
    Model(
        "Platypus2 Instruct (70B)",
        "together_ai/garage-bAInd/Platypus2-70B-instruct",
        None,
        "Together AI",
        context_length=4096,
    ),
    Model(
        "MythoMax-L2 (13B)",
        "together_ai/Gryphe/MythoMax-L2-13b",
        None,
        "Together AI",
        context_length=4096,
    ),
    Model(
        "Vicuna v1.5 (13B)",
        "together_ai/lmsys/vicuna-13b-v1.5",
        None,
        "Together AI",
        context_length=4096,
    ),
    Model(
        "Vicuna v1.5 (7B)",
        "together_ai/lmsys/vicuna-7b-v1.5",
        None,
        "Together AI",
        context_length=4096,
        size_billion_parameters=7,
        # selected=True
    ),
    Model(
        "Code Llama Instruct (13B)",
        "together_ai/codellama/CodeLlama-13b-Instruct-hf",
        None,
        "Together AI",
        context_length=16384,
    ),
    Model(
        "Code Llama Instruct (34B)",
        "together_ai/codellama/CodeLlama-34b-Instruct-hf",
        None,
        "Together AI",
        context_length=16384,
    ),
    Model(
        "Code Llama Instruct (70B)",
        "together_ai/codellama/CodeLlama-70b-Instruct-hf",
        None,
        "Together AI",
        context_length=4096,
    ),
    Model(
        "Code Llama Instruct (7B)",
        "together_ai/codellama/CodeLlama-7b-Instruct-hf",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=16384,
    ),
    Model(
        "LLaMA-2 Chat (13B)",
        "together_ai/togethercomputer/llama-2-13b-chat",
        None,
        "Together AI",
        size_billion_parameters=13,
        context_length=4096,
    ),
    Model(
        "LLaMA-2 Chat (70B)",
        "together_ai/togethercomputer/llama-2-70b-chat",
        None,
        "Together AI",
        size_billion_parameters=70,
        context_length=4096,
    ),
    Model(
        "LLaMA-2 Chat (7B)",
        "together_ai/togethercomputer/llama-2-7b-chat",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=4096,
        # selected=True,
    ),
    Model(
        "Mistral (7B) Instruct",
        "together_ai/mistralai/Mistral-7B-Instruct-v0.1",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=4096,
    ),
    Model(
        "Mistral (7B) Instruct v0.2 (Together AI)",
        "together_ai/mistralai/Mistral-7B-Instruct-v0.2",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=32768,
        selected=True,
    ),
    Model(
        "Mixtral-8x7B Instruct (46.7B)",
        "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1",
        None,
        "Together AI",
        size_billion_parameters=8 * 7,
        context_length=32768,
    ),
    Model(
        "Nous Capybara v1.9 (7B)",
        "together_ai/NousResearch/Nous-Capybara-7B-V1p9",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=8192,
    ),
    Model(
        "Nous Hermes 2 - Mixtral 8x7B-DPO (46.7B)",
        "together_ai/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
        None,
        "Together AI",
        size_billion_parameters=8 * 7,
        context_length=32768,
    ),
    Model(
        "Nous Hermes 2 - Mixtral 8x7B-SFT (46.7B)",
        "together_ai/NousResearch/Nous-Hermes-2-Mixtral-8x7B-SFT",
        None,
        "Together AI",
        size_billion_parameters=8 * 7,
        context_length=32768,
    ),
    Model(
        "Nous Hermes LLaMA-2 (7B)",
        "together_ai/NousResearch/Nous-Hermes-llama-2-7b",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=4096,
    ),
    Model(
        "Nous Hermes Llama-2 (13B)",
        "together_ai/NousResearch/Nous-Hermes-Llama2-13b",
        None,
        "Together AI",
        size_billion_parameters=13,
        context_length=4096,
    ),
    Model(
        "Nous Hermes-2 Yi (34B)",
        "together_ai/NousResearch/Nous-Hermes-2-Yi-34B",
        None,
        "Together AI",
        size_billion_parameters=34,
        context_length=4096,
    ),
    Model(
        "OpenChat 3.5 (7B)",
        "together_ai/openchat/openchat-3.5-1210",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=8192,
    ),
    Model(
        "OpenOrca Mistral (7B) 8K",
        "together_ai/Open-Orca/Mistral-7B-OpenOrca",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=8192,
    ),
    Model(
        "Qwen-Chat (7B)",
        "together_ai/togethercomputer/Qwen-7B-Chat",
        None,
        "Together AI",
        size_billion_parameters=7,
        context_length=8192,
    ),
    Model(
        "Qwen 1.5 Chat (0.5B)",
        "together_ai/Qwen/Qwen1.5-0.5B-Chat",
        None,
        "Together AI",
        size_billion_parameters=0.5,
        context_length=32768,
    ),
    Model(
        "Qwen 1.5 Chat (1.8B)",
        "together_ai/Qwen/Qwen1.5-1.8B-Chat",
        None,
        "Together AI",
        context_length=32768,
        size_billion_parameters=1.8,
    ),
    Model(
        "Qwen 1.5 Chat (4B)",
        "together_ai/Qwen/Qwen1.5-4B-Chat",
        None,
        "Together AI",
        size_billion_parameters=4,
        context_length=32768,
    ),
    Model(
        "Qwen 1.5 Chat (7B)",
        "together_ai/Qwen/Qwen1.5-7B-Chat",
        None,
        "Together AI",
        context_length=32768,
        size_billion_parameters=7
        # selected=True
    ),
    Model(
        "Qwen 1.5 Chat (14B)",
        "together_ai/Qwen/Qwen1.5-14B-Chat",
        None,
        "Together AI",
        size_billion_parameters=14,
        context_length=32768,
    ),
    Model(
        "Qwen 1.5 Chat (72B)",
        "together_ai/Qwen/Qwen1.5-72B-Chat",
        None,
        "Together AI",
        context_length=4096,
    ),
    Model(
        "Snorkel Mistral PairRM DPO (7B)",
        "together_ai/snorkelai/Snorkel-Mistral-PairRM-DPO",
        None,
        "Together AI",
        context_length=32768,
        # selected=True,
    ),
    Model(
        "Alpaca (7B)",
        "together_ai/togethercomputer/alpaca-7b",
        None,
        "Together AI",
        context_length=2048,
    ),
    Model(
        "OpenHermes-2-Mistral (7B)",
        "teknium/OpenHermes-2-Mistral-7B",
        None,
        "Together AI",
        context_length=8192,
    ),
    Model(
        "OpenHermes-2.5-Mistral (7B)",
        "together_ai/teknium/OpenHermes-2p5-Mistral-7B",
        None,
        "Together AI",
        context_length=8192,
        # selected=True,
    ),
    Model(
        "Falcon Instruct (40B)",
        "together_ai/togethercomputer/falcon-40b-instruct",
        None,
        "Together AI",
        context_length=2048,
    ),
    Model(
        "Falcon Instruct (7B)",
        "together_ai/togethercomputer/falcon-7b-instruct",
        None,
        "Together AI",
        context_length=2048,
        # selected=True,
    ),
    Model(
        "LLaMA-2-7B-32K-Instruct (7B)",
        "together_ai/togethercomputer/Llama-2-7B-32K-Instruct",
        None,
        "Together AI",
        context_length=32768,
    ),
    Model(
        "RedPajama-INCITE Chat (3B)",
        "together_ai/togethercomputer/RedPajama-INCITE-Chat-3B-v1",
        None,
        "Together AI",
        size_billion_parameters=3,
        context_length=2048,
    ),
    Model(
        "RedPajama-INCITE Chat (7B)",
        "together_ai/togethercomputer/RedPajama-INCITE-7B-Chat",
        None,
        "Together AI",
        context_length=2048,
        size_billion_parameters=7,
        # selected=True,
    ),
    Model(
        "StripedHyena Nous (7B)",
        "together_ai/togethercomputer/StripedHyena-Nous-7B",
        None,
        "Together AI",
        context_length=32768,
        size_billion_parameters=7,
    ),
    Model(
        "ReMM SLERP L2 (13B)",
        "together_ai/Undi95/ReMM-SLERP-L2-13B",
        None,
        "Together AI",
        context_length=4096,
        size_billion_parameters=13,
    ),
    Model(
        "Toppy M (7B)",
        "together_ai/Undi95/Toppy-M-7B",
        None,
        "Together AI",
        context_length=4096,
        size_billion_parameters=7,
    ),
    Model(
        "WizardLM v1.2 (13B)",
        "together_ai/WizardLM/WizardLM-13B-V1.2",
        None,
        "Together AI",
        context_length=4096,
        size_billion_parameters=13,
        # selected=True,
    ),
    Model(
        "Upstage SOLAR Instruct v1 (11B)",
        "together_ai/upstage/SOLAR-10.7B-Instruct-v1.0",
        None,
        "Together AI",
        context_length=4096,
        size_billion_parameters=11,
        # selected=True,
    ),
]

MISSING_MEASUREMENTS = ['gpt-3.5-turbo', 'zephyr-7b-beta', 'Mistral-7B-Instruct-v0.2',
       'llama-2-70b-chat', 'Mixtral-8x7B-Instruct-v0.1',
       'TinyLlama/TinyLlama-1.1B-Chat-v1.0', 'gemini-pro', 'gpt-4-turbo',
       'gpt-4', 'chat-bison (PaLM 2)', 'chat-bison-32k (PaLM 2 32K)',
       'Vicuna v1.5 (7B)', 'Mistral (7B) Instruct v0.2 (Together AI)',
       '01-ai Yi Chat (34B)', 'Chronos Hermes (13B)', 'LLaMA-2 Chat (7B)',
       'Qwen 1.5 Chat (7B)', 'Snorkel Mistral PairRM DPO (7B)',
       'OpenHermes-2.5-Mistral (7B)', 'Falcon Instruct (7B)',
       'RedPajama-INCITE Chat (7B)', 'WizardLM v1.2 (13B)',
       'Upstage SOLAR Instruct v1 (11B)']

def together_ai_price(billions_of_parameters):
    if billions_of_parameters <= 4:
        return 0.1
    elif 4.1 <= billions_of_parameters <= 8:
        return 0.2
    elif 8.1 <= billions_of_parameters <= 21:
        return 0.3
    elif 21.1 <= billions_of_parameters <= 41:
        return 0.8
    elif 41 < billions_of_parameters: # <= 70:
        return 0.9

for model in MODELS:
    if model.model_display_name in MISSING_MEASUREMENTS:
        model.selected = True
    
    if model.provider == "Together AI":
        pattern = r"\((\d+(\.\d+)?)B\)"
        match = re.search(pattern, model.model_display_name)
        if match:
            number_in_parenthesis = float(match.group(1))
            model.size_billion_parameters = number_in_parenthesis
            model.cost_per_million_tokens = together_ai_price(model.size_billion_parameters)
            model.__post_init__()