refactor the app to use firestore
Browse files- .gitignore +2 -1
- main.py +1 -1
- requirements.txt +2 -1
- src/app.py +1 -1
- src/core/config.py +1 -2
- src/services/api.py +0 -38
- src/services/firebase.py +111 -0
.gitignore
CHANGED
|
@@ -38,4 +38,5 @@ ENV/
|
|
| 38 |
.streamlit/
|
| 39 |
|
| 40 |
# Logs
|
| 41 |
-
*.log
|
|
|
|
|
|
| 38 |
.streamlit/
|
| 39 |
|
| 40 |
# Logs
|
| 41 |
+
*.log
|
| 42 |
+
pocketpal-ai-6e230-6fe017a6fd65.json
|
main.py
CHANGED
|
@@ -7,7 +7,7 @@ from src.components.visualizations import (
|
|
| 7 |
render_performance_plots,
|
| 8 |
render_leaderboard_table,
|
| 9 |
)
|
| 10 |
-
from src.services.
|
| 11 |
|
| 12 |
# Configure the page
|
| 13 |
st.set_page_config(
|
|
|
|
| 7 |
render_performance_plots,
|
| 8 |
render_leaderboard_table,
|
| 9 |
)
|
| 10 |
+
from src.services.firebase import fetch_leaderboard_data
|
| 11 |
|
| 12 |
# Configure the page
|
| 13 |
st.set_page_config(
|
requirements.txt
CHANGED
|
@@ -4,4 +4,5 @@ python-dotenv>=1.0.0
|
|
| 4 |
pandas>=2.1.3
|
| 5 |
plotly>=5.18.0
|
| 6 |
httpx>=0.25.1
|
| 7 |
-
pydantic-settings>=2.0.3
|
|
|
|
|
|
| 4 |
pandas>=2.1.3
|
| 5 |
plotly>=5.18.0
|
| 6 |
httpx>=0.25.1
|
| 7 |
+
pydantic-settings>=2.0.3
|
| 8 |
+
firebase-admin
|
src/app.py
CHANGED
|
@@ -7,7 +7,7 @@ async def fetch_and_filter_data(
|
|
| 7 |
benchmark_label: Optional[str] = None
|
| 8 |
) -> pd.DataFrame:
|
| 9 |
"""Fetch and filter data based on parameters"""
|
| 10 |
-
from .services.
|
| 11 |
|
| 12 |
return await fetch_leaderboard_data(
|
| 13 |
model_name=model_name,
|
|
|
|
| 7 |
benchmark_label: Optional[str] = None
|
| 8 |
) -> pd.DataFrame:
|
| 9 |
"""Fetch and filter data based on parameters"""
|
| 10 |
+
from .services.firebase import fetch_leaderboard_data
|
| 11 |
|
| 12 |
return await fetch_leaderboard_data(
|
| 13 |
model_name=model_name,
|
src/core/config.py
CHANGED
|
@@ -2,8 +2,7 @@ from pydantic_settings import BaseSettings
|
|
| 2 |
from functools import lru_cache
|
| 3 |
|
| 4 |
class Settings(BaseSettings):
|
| 5 |
-
|
| 6 |
-
HF_TOKEN: str
|
| 7 |
|
| 8 |
class Config:
|
| 9 |
case_sensitive = True
|
|
|
|
| 2 |
from functools import lru_cache
|
| 3 |
|
| 4 |
class Settings(BaseSettings):
|
| 5 |
+
FIRESTORE_COLLECTION: str = "benchmarks"
|
|
|
|
| 6 |
|
| 7 |
class Config:
|
| 8 |
case_sensitive = True
|
src/services/api.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
import httpx
|
| 2 |
-
import pandas as pd
|
| 3 |
-
from typing import Optional, Dict
|
| 4 |
-
import streamlit as st
|
| 5 |
-
from src.core.config import settings
|
| 6 |
-
|
| 7 |
-
async def fetch_leaderboard_data(
|
| 8 |
-
model_name: Optional[str] = None,
|
| 9 |
-
benchmark_label: Optional[str] = None
|
| 10 |
-
) -> pd.DataFrame:
|
| 11 |
-
"""Fetch and process leaderboard data"""
|
| 12 |
-
params = {}
|
| 13 |
-
if model_name and model_name != "All":
|
| 14 |
-
params["model_name"] = model_name
|
| 15 |
-
if benchmark_label and benchmark_label != "All":
|
| 16 |
-
params["benchmark_label"] = benchmark_label
|
| 17 |
-
|
| 18 |
-
headers = {
|
| 19 |
-
"Authorization": f"Bearer {settings.HF_TOKEN}",
|
| 20 |
-
"Accept": "application/json"
|
| 21 |
-
}
|
| 22 |
-
|
| 23 |
-
try:
|
| 24 |
-
async with httpx.AsyncClient() as client:
|
| 25 |
-
response = await client.get(
|
| 26 |
-
f"{settings.API_URL}/api/v1/leaderboard",
|
| 27 |
-
params=params,
|
| 28 |
-
headers=headers,
|
| 29 |
-
follow_redirects=True
|
| 30 |
-
)
|
| 31 |
-
response.raise_for_status()
|
| 32 |
-
data = response.json()
|
| 33 |
-
return pd.DataFrame(data)
|
| 34 |
-
except Exception as e:
|
| 35 |
-
st.error(f"Error fetching data: {str(e)}")
|
| 36 |
-
if hasattr(e, 'response'):
|
| 37 |
-
st.error(f"Response: {e.response.text}")
|
| 38 |
-
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/services/firebase.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import firebase_admin
|
| 2 |
+
from firebase_admin import credentials, firestore
|
| 3 |
+
from typing import List, Dict, Optional
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import streamlit as st
|
| 6 |
+
|
| 7 |
+
def initialize_firebase():
|
| 8 |
+
"""Initialize Firebase with credentials"""
|
| 9 |
+
try:
|
| 10 |
+
firebase_admin.get_app()
|
| 11 |
+
except ValueError:
|
| 12 |
+
cred = credentials.Certificate('pocketpal-ai-6e230-6fe017a6fd65.json')
|
| 13 |
+
firebase_admin.initialize_app(cred)
|
| 14 |
+
return firestore.client()
|
| 15 |
+
|
| 16 |
+
db = initialize_firebase()
|
| 17 |
+
|
| 18 |
+
def normalize_device_id(device_info: dict) -> str:
|
| 19 |
+
"""Normalize device identifier for aggregation"""
|
| 20 |
+
emulator = "/Emulator" if device_info["isEmulator"] else ""
|
| 21 |
+
if device_info["systemName"].lower() == "ios":
|
| 22 |
+
return f"iOS/{device_info['model']}{emulator}"
|
| 23 |
+
|
| 24 |
+
memory_tier = f"{device_info['totalMemory'] // (1024**3)}GB"
|
| 25 |
+
return f"{device_info['brand']}/{device_info['model']}/{memory_tier}{emulator}"
|
| 26 |
+
|
| 27 |
+
def format_params_in_b(params: int) -> float:
|
| 28 |
+
"""Format number of parameters in billions"""
|
| 29 |
+
b_value = params / 1e9
|
| 30 |
+
if b_value >= 10:
|
| 31 |
+
return round(b_value, 1)
|
| 32 |
+
elif b_value >= 1:
|
| 33 |
+
return round(b_value, 2)
|
| 34 |
+
else:
|
| 35 |
+
return round(b_value, 3)
|
| 36 |
+
|
| 37 |
+
def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
|
| 38 |
+
"""Format submissions for leaderboard display"""
|
| 39 |
+
formatted_data = []
|
| 40 |
+
|
| 41 |
+
for sub in submissions:
|
| 42 |
+
try:
|
| 43 |
+
benchmark_result = sub.get('benchmarkResult', {})
|
| 44 |
+
device_info = sub.get('deviceInfo', {})
|
| 45 |
+
|
| 46 |
+
if not benchmark_result or not device_info:
|
| 47 |
+
continue
|
| 48 |
+
|
| 49 |
+
formatted_data.append({
|
| 50 |
+
"Device": f"{device_info.get('model', 'Unknown')} [Emulator]" if device_info.get('isEmulator') else device_info.get('model', 'Unknown'),
|
| 51 |
+
"Platform": device_info.get('systemName', 'Unknown'),
|
| 52 |
+
"Benchmark": f"{benchmark_result.get('config', {}).get('label', 'Unknown')} (pp: {benchmark_result.get('config', {}).get('pp', 'N/A')}, tg: {benchmark_result.get('config', {}).get('tg', 'N/A')})",
|
| 53 |
+
"Model": benchmark_result.get('modelName', 'Unknown'),
|
| 54 |
+
"Model Size": format_params_in_b(benchmark_result.get('modelNParams', 0)),
|
| 55 |
+
"Prompt Processing": round(benchmark_result.get('ppAvg', 0), 2),
|
| 56 |
+
"Token Generation": round(benchmark_result.get('tgAvg', 0), 2),
|
| 57 |
+
"Memory Usage (%)": benchmark_result.get('peakMemoryUsage', {}).get('percentage'),
|
| 58 |
+
"Memory Usage (GB)": round(benchmark_result.get('peakMemoryUsage', {}).get('used', 0) / (1024**3), 2) if benchmark_result.get('peakMemoryUsage', {}).get('used') else None,
|
| 59 |
+
"Total Memory (GB)": round(device_info.get('totalMemory', 0) / (1024**3), 2),
|
| 60 |
+
"CPU Cores": device_info.get('cpuDetails', {}).get('cores', 'Unknown'),
|
| 61 |
+
"Normalized Device ID": normalize_device_id(device_info),
|
| 62 |
+
"Timestamp": benchmark_result.get('timestamp', 'Unknown'),
|
| 63 |
+
"Model ID": benchmark_result.get('modelId', 'Unknown'),
|
| 64 |
+
"OID": benchmark_result.get('oid'),
|
| 65 |
+
})
|
| 66 |
+
except Exception as e:
|
| 67 |
+
st.warning(f"Error processing submission: {str(e)}")
|
| 68 |
+
continue
|
| 69 |
+
|
| 70 |
+
return pd.DataFrame(formatted_data)
|
| 71 |
+
|
| 72 |
+
async def fetch_leaderboard_data(
|
| 73 |
+
model_name: Optional[str] = None,
|
| 74 |
+
benchmark_label: Optional[str] = None
|
| 75 |
+
) -> pd.DataFrame:
|
| 76 |
+
"""Fetch and process leaderboard data from Firestore"""
|
| 77 |
+
try:
|
| 78 |
+
# Navigate to the correct collection path: benchmarks/v1/submissions
|
| 79 |
+
submissions_ref = db.collection('benchmarks').document('v1').collection('submissions')
|
| 80 |
+
|
| 81 |
+
# Get all documents
|
| 82 |
+
docs = submissions_ref.stream()
|
| 83 |
+
all_docs = list(docs)
|
| 84 |
+
|
| 85 |
+
if len(all_docs) == 0:
|
| 86 |
+
return pd.DataFrame()
|
| 87 |
+
|
| 88 |
+
# Process documents and filter in memory
|
| 89 |
+
submissions = []
|
| 90 |
+
|
| 91 |
+
for doc in all_docs:
|
| 92 |
+
data = doc.to_dict()
|
| 93 |
+
|
| 94 |
+
if not data or 'benchmarkResult' not in data:
|
| 95 |
+
continue
|
| 96 |
+
|
| 97 |
+
benchmark_result = data['benchmarkResult']
|
| 98 |
+
|
| 99 |
+
# Apply filters
|
| 100 |
+
if model_name and model_name != "All" and benchmark_result.get('modelName') != model_name:
|
| 101 |
+
continue
|
| 102 |
+
if benchmark_label and benchmark_label != "All" and benchmark_result.get('config', {}).get('label') != benchmark_label:
|
| 103 |
+
continue
|
| 104 |
+
|
| 105 |
+
submissions.append(data)
|
| 106 |
+
|
| 107 |
+
return format_leaderboard_data(submissions)
|
| 108 |
+
|
| 109 |
+
except Exception as e:
|
| 110 |
+
st.error(f"Error fetching data from Firestore: {str(e)}")
|
| 111 |
+
return pd.DataFrame()
|