|
from dataclasses import dataclass, make_dataclass |
|
|
|
import pandas as pd |
|
|
|
|
|
def fields(raw_class): |
|
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] |
|
|
|
|
|
|
|
|
|
|
|
@dataclass |
|
class ColumnContent: |
|
name: str |
|
type: str |
|
displayed_by_default: bool |
|
hidden: bool = False |
|
never_hidden: bool = False |
|
|
|
|
|
|
|
auto_eval_column_dict = [] |
|
|
|
auto_eval_column_dict.append( |
|
["model", ColumnContent, ColumnContent("Model Name", "markdown", True, never_hidden=True)] |
|
) |
|
auto_eval_column_dict.append(["model_provider", ColumnContent, ColumnContent("LLM Provider", "markdown", True)]) |
|
auto_eval_column_dict.append( |
|
["use_case_name", ColumnContent, ColumnContent("Use Case Name", "markdown", True, never_hidden=True)] |
|
) |
|
auto_eval_column_dict.append(["use_case_type", ColumnContent, ColumnContent("Use Case Type", "markdown", False)]) |
|
auto_eval_column_dict.append( |
|
["accuracy_method", ColumnContent, ColumnContent("Accuracy Method", "markdown", False, never_hidden=True)] |
|
) |
|
|
|
auto_eval_column_dict.append(["accuracy_metric_average", ColumnContent, ColumnContent("Accuracy", "markdown", True)]) |
|
auto_eval_column_dict.append( |
|
[ |
|
"accuracy_metric_instruction_following", |
|
ColumnContent, |
|
ColumnContent("Instruction Following", "markdown", True), |
|
] |
|
) |
|
auto_eval_column_dict.append( |
|
["accuracy_metric_completeness", ColumnContent, ColumnContent("Completeness", "markdown", True)] |
|
) |
|
auto_eval_column_dict.append( |
|
["accuracy_metric_conciseness", ColumnContent, ColumnContent("Conciseness", "markdown", True)] |
|
) |
|
auto_eval_column_dict.append( |
|
["accuracy_metric_factuality", ColumnContent, ColumnContent("Factuality", "markdown", True)] |
|
) |
|
|
|
auto_eval_column_dict.append(["latency", ColumnContent, ColumnContent("Response Time (Sec)", "markdown", True)]) |
|
auto_eval_column_dict.append( |
|
["mean_output_tokens", ColumnContent, ColumnContent("Mean Output Tokens", "markdown", True)] |
|
) |
|
auto_eval_column_dict.append(["cost_band", ColumnContent, ColumnContent("Cost Band", "markdown", True)]) |
|
|
|
auto_eval_column_dict.append(["ts", ColumnContent, ColumnContent("Trust & Safety", "markdown", True)]) |
|
auto_eval_column_dict.append(["safety", ColumnContent, ColumnContent("Safety", "markdown", False)]) |
|
auto_eval_column_dict.append(["privacy", ColumnContent, ColumnContent("Privacy", "markdown", False)]) |
|
auto_eval_column_dict.append(["truthfulness", ColumnContent, ColumnContent("Truthfulness", "markdown", False)]) |
|
auto_eval_column_dict.append(["crm_fairness", ColumnContent, ColumnContent("CRM Fairness", "markdown", False)]) |
|
|
|
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True) |
|
|
|
|
|
|
|
cost_eval_column_dict = [] |
|
|
|
cost_eval_column_dict.append( |
|
["model", ColumnContent, ColumnContent("Model Name", "markdown", True, never_hidden=True)] |
|
) |
|
cost_eval_column_dict.append(["model_provider", ColumnContent, ColumnContent("LLM Provider", "markdown", True)]) |
|
cost_eval_column_dict.append( |
|
["use_case_flavor", ColumnContent, ColumnContent("Cost and Speed: Flavor", "markdown", True)] |
|
) |
|
cost_eval_column_dict.append(["latency", ColumnContent, ColumnContent("Response Time (Sec)", "markdown", True)]) |
|
cost_eval_column_dict.append( |
|
["mean_output_tokens", ColumnContent, ColumnContent("Mean Output Tokens", "markdown", True)] |
|
) |
|
cost_eval_column_dict.append(["cost_band", ColumnContent, ColumnContent("Cost Band", "markdown", True)]) |
|
CostEvalColumn = make_dataclass("CostEvalColumn", cost_eval_column_dict, frozen=True) |
|
|
|
|
|
ts_eval_column_dict = [] |
|
|
|
ts_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model Name", "markdown", True, never_hidden=True)]) |
|
|
|
ts_eval_column_dict.append(["ts", ColumnContent, ColumnContent("Trust & Safety", "markdown", True)]) |
|
ts_eval_column_dict.append(["safety", ColumnContent, ColumnContent("Safety", "markdown", False)]) |
|
ts_eval_column_dict.append(["privacy", ColumnContent, ColumnContent("Privacy", "markdown", False)]) |
|
ts_eval_column_dict.append(["truthfulness", ColumnContent, ColumnContent("Truthfulness", "markdown", False)]) |
|
ts_eval_column_dict.append(["crm_fairness", ColumnContent, ColumnContent("CRM Fairness", "markdown", False)]) |
|
|
|
TSEvalColumn = make_dataclass("TSEvalColumn", ts_eval_column_dict, frozen=True) |
|
|
|
|
|
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] |
|
TYPES = [c.type for c in fields(AutoEvalColumn) if not c.hidden] |
|
COLS_LITE = [c.name for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden] |
|
TYPES_LITE = [c.type for c in fields(AutoEvalColumn) if c.displayed_by_default and not c.hidden] |
|
|
|
COST_COLS = [c.name for c in fields(CostEvalColumn) if not c.hidden] |
|
COST_TYPES = [c.type for c in fields(CostEvalColumn) if not c.hidden] |
|
|
|
TS_COLS = [c.name for c in fields(TSEvalColumn) if not c.hidden] |
|
TS_TYPES = [c.type for c in fields(TSEvalColumn) if not c.hidden] |
|
|
|
|
|
|
|
NUMERIC_INTERVALS = { |
|
"?": pd.Interval(-1, 0, closed="right"), |
|
"~1.5": pd.Interval(0, 2, closed="right"), |
|
"~3": pd.Interval(2, 4, closed="right"), |
|
"~7": pd.Interval(4, 9, closed="right"), |
|
"~13": pd.Interval(9, 20, closed="right"), |
|
"~35": pd.Interval(20, 45, closed="right"), |
|
"~60": pd.Interval(45, 70, closed="right"), |
|
"70+": pd.Interval(70, 10000, closed="right"), |
|
} |
|
|