import streamlit as st import pandas as pd st.set_page_config(page_title="LLM API Budget Dashboard", layout="wide") # Title and description st.title("LLM API Budget Dashboard") # Define LLM models and their costs llm_data = { "GPT-4o": {"input_cost_per_m": 2.50, "output_cost_per_m": 10.00}, "Claude 3.7 Sonnet": {"input_cost_per_m": 3.00, "output_cost_per_m": 15.00}, "Gemini Flash 1.5-8b": {"input_cost_per_m": 0.038, "output_cost_per_m": 0.15}, "o3-mini": {"input_cost_per_m": 1.10, "output_cost_per_m": 4.40} } # Convert the LLM data to a DataFrame for displaying in a table llm_df = pd.DataFrame([ { "Model": model, "Input Cost ($/M tokens)": data["input_cost_per_m"], "Output Cost ($/M tokens)": data["output_cost_per_m"] } for model, data in llm_data.items() ]) # Display LLM cost info st.subheader("LLM Cost Information") st.dataframe(llm_df, use_container_width=True) # Create columns for main layout left_col, right_col = st.columns([1, 3]) with left_col: st.header("Configuration") # Token input section st.subheader("Token Settings") input_tokens = st.number_input("Input Tokens", min_value=1, value=400, step=100) output_tokens = st.number_input("Output Tokens", min_value=1, value=200, step=100) # LLM selection st.subheader("Select LLMs") selected_llms = st.multiselect("Choose LLMs", options=list(llm_data.keys()), default=list(llm_data.keys())) # Run count settings st.subheader("Run Count Settings") uniform_runs = st.checkbox("Run all LLMs the same number of times", value=True) if uniform_runs: uniform_run_count = st.number_input("Number of runs for all LLMs", min_value=1, value=400000, step=1) run_counts = {llm: uniform_run_count for llm in selected_llms} else: st.write("Set individual run counts for each LLM:") run_counts = {} for llm in selected_llms: run_counts[llm] = st.number_input(f"Runs for {llm}", min_value=1, value=1, step=1) # Stability test settings st.subheader("Stability Test Settings") stability_test = st.checkbox("Enable stability testing", value=False) # Global settings for stability testing stability_iterations = {} stability_data_percentages = {} if stability_test: st.write("Global Stability Settings:") use_subset = st.checkbox("Test stability on a subset of data", value=False) if use_subset: default_percent = st.slider( "Default data percentage for stability tests", min_value=10, max_value=100, value=50, step=5, help="Percentage of the input data to use for stability testing" ) st.write("Set stability iterations for selected LLMs:") for llm in selected_llms: st.markdown(f"**{llm} Stability Settings**") col1, col2 = st.columns(2) with col1: stability_enabled = st.checkbox(f"Test stability", value=False, key=f"stability_{llm}") if stability_enabled: with col1: iterations = st.number_input( f"Iterations", min_value=2, value=10, step=1, key=f"iterations_{llm}" ) stability_iterations[llm] = iterations with col2: if use_subset: custom_percent = st.number_input( f"Data %", min_value=5, max_value=100, value=default_percent, step=5, key=f"percent_{llm}", help="Percentage of the input data to use" ) stability_data_percentages[llm] = custom_percent / 100.0 else: stability_data_percentages[llm] = 1.0 if llm != selected_llms[-1]: st.markdown("---") with right_col: # Calculate costs st.header("Cost Results") if not selected_llms: st.warning("Please select at least one LLM model.") else: results = [] for llm in selected_llms: base_runs = run_counts[llm] stability_runs = stability_iterations.get(llm, 0) data_percentage = stability_data_percentages.get(llm, 1.0) # Calculate total runs if stability_runs == 0: total_runs = base_runs effective_data_percentage = 1.0 # No stability testing, use full data else: total_runs = base_runs * stability_runs effective_data_percentage = data_percentage # Use configured percentage for stability testing # Calculate tokens based on data percentage effective_input_tokens = input_tokens * effective_data_percentage effective_output_tokens = output_tokens * effective_data_percentage total_input_tokens = effective_input_tokens * total_runs total_output_tokens = effective_output_tokens * total_runs input_cost = (total_input_tokens / 1_000_000) * llm_data[llm]["input_cost_per_m"] output_cost = (total_output_tokens / 1_000_000) * llm_data[llm]["output_cost_per_m"] total_cost = input_cost + output_cost results.append({ "Model": llm, "Base Runs": base_runs, "Stability Test Iterations": stability_iterations.get(llm, 0), "Data Percentage": f"{data_percentage * 100:.0f}%" if stability_runs > 0 else "100%", "Effective Input Tokens": int(effective_input_tokens), "Effective Output Tokens": int(effective_output_tokens), "Total Runs": total_runs, "Total Input Tokens": int(total_input_tokens), "Total Output Tokens": int(total_output_tokens), "Input Cost ($)": input_cost, "Output Cost ($)": output_cost, "Total Cost ($)": total_cost }) # Create DataFrame from results results_df = pd.DataFrame(results) # Display results st.subheader("Cost Breakdown") st.dataframe(results_df, use_container_width=True) # Calculate overall totals total_input_cost = results_df["Input Cost ($)"].sum() total_output_cost = results_df["Output Cost ($)"].sum() total_cost = results_df["Total Cost ($)"].sum() # Display totals col1, col2, col3 = st.columns(3) col1.metric("Total Input Cost", f"${total_input_cost:.2f}") col2.metric("Total Output Cost", f"${total_output_cost:.2f}") col3.metric("Total API Cost", f"${total_cost:.2f}") # Export options st.subheader("Export Options") csv = results_df.to_csv(index=False).encode('utf-8') st.download_button( label="Download Results as CSV", data=csv, file_name='llm_budget_results.csv', mime='text/csv', )