Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import re | |
import time | |
import spaces | |
import requests | |
import gradio as gr | |
from huggingface_hub import HfApi, list_models | |
from threading import Thread | |
HF_TOKEN = os.environ.get("HF_TOKEN", None) | |
TITLE = """ | |
<h1><center>Open-Schizo-Leaderboard</center></h1> | |
<center> | |
<p>Comparing LLM Cards for how absolutely Schizo they are</p> | |
<p>If you like my work please subscribe for $5 a month to <a href="https://www.patreon.com/Rombodawg" target="_blank">https://www.patreon.com/Rombodawg</a></p> | |
</center> | |
""" | |
CSS = """ | |
.duplicate-button { | |
margin: auto !important; | |
color: white !important; | |
background: black !important; | |
border-radius: 100vh !important; | |
} | |
h3 { | |
text-align: center; | |
} | |
table { | |
width: 100%; | |
border-collapse: collapse; | |
} | |
table, th, td { | |
border: 1px solid #ddd; | |
} | |
th, td { | |
padding: 8px; | |
text-align: left; | |
} | |
th { | |
background-color: #f2f2f2; | |
cursor: pointer; | |
} | |
tr:nth-child(even) { | |
background-color: #f9f9f9; | |
} | |
tr:hover { | |
background-color: #f1f1f1; | |
} | |
.leaderboard-container { | |
max-height: 600px; | |
overflow-y: auto; | |
} | |
.loading { | |
text-align: center; | |
font-size: 18px; | |
padding: 20px; | |
} | |
""" | |
# List of schizo words to check for | |
SCHIZO_WORDS = [ | |
"MAXED", "Max", "SUPER", "Duped", "Edge", "maid", "Solution", | |
"gpt-4", "gpt4o", "claude-3.5", "claude-3.7", "o1", "o3-mini", | |
"gpt-4.5", "chatgpt", "merge", "merged", "best", "greatest", | |
"highest quality", "Class 1", "NSFW", "4chan", "reddit", "vibe", | |
"vibe check", "vibe checking", "dirty", "meme", "memes", "upvote", | |
"Linear", "SLERP", "Nearswap", "Task Arithmetic", "Task_Arithmetic", | |
"TIES", "DARE", "Passthrough", "Model Breadcrumbs", "Model Stock", | |
"NuSLERP", "DELL", "DELLA Task Arithmeti", "SCE" | |
] | |
# List of markdown symbols | |
MARKDOWN_SYMBOLS = ["#", "*", "_", "`", ">", "-", "+", "[", "]", "(", ")", "!", "\\", "|", "~", "<", ">", "=", ":"] | |
def count_schizo_words(text): | |
"""Count occurrences of schizo words in text""" | |
count = 0 | |
for word in SCHIZO_WORDS: | |
# Case insensitive search | |
count += len(re.findall(re.escape(word), text, re.IGNORECASE)) | |
return count | |
def count_markdown_symbols(text): | |
"""Count occurrences of markdown symbols in text""" | |
count = 0 | |
for symbol in MARKDOWN_SYMBOLS: | |
count += text.count(symbol) | |
return count | |
def calculate_word_count(text): | |
"""Calculate word count in text""" | |
return len(re.findall(r'\w+', text)) | |
def calculate_schizo_rating(readme_content): | |
"""Calculate schizo rating based on defined criteria""" | |
# Count schizo words | |
schizo_word_count = count_schizo_words(readme_content) | |
# Calculate base rating from schizo words | |
word_schizo_rating = schizo_word_count * 10 | |
# Calculate word count penalties | |
word_count = calculate_word_count(readme_content) | |
# Word count penalty | |
wordiness_schizo_rating = 0 | |
if word_count < 150: | |
wordiness_schizo_rating = word_schizo_rating * 0.5 | |
elif word_count > 1000: | |
extra_penalty = 0 | |
if word_count > 1000: | |
extra_penalty = 0.5 | |
if word_count > 1500: | |
extra_penalty = 0.75 | |
if word_count > 2000: | |
extra_penalty = 1.0 | |
# Additional penalty for every 500 words over 2000 | |
extra_words = word_count - 2000 | |
extra_500s = extra_words // 500 | |
extra_penalty += extra_500s * 0.25 | |
wordiness_schizo_rating = word_schizo_rating * extra_penalty | |
# Markdown symbol penalty | |
markdown_count = count_markdown_symbols(readme_content) | |
visual_schizo_rating = 0 | |
if markdown_count > 100: | |
visual_penalty = 0 | |
if markdown_count > 100: | |
visual_penalty = 0.25 | |
if markdown_count > 150: | |
visual_penalty = 0.5 | |
# Additional penalty for every 50 symbols over 150 | |
extra_symbols = markdown_count - 150 | |
extra_50s = extra_symbols // 50 | |
visual_penalty += extra_50s * 0.25 | |
visual_schizo_rating = word_schizo_rating * visual_penalty | |
# Calculate final combined score | |
combined_schizo_rating = word_schizo_rating + wordiness_schizo_rating + visual_schizo_rating | |
return { | |
"combined": combined_schizo_rating, | |
"word": word_schizo_rating, | |
"wordiness": wordiness_schizo_rating, | |
"visual": visual_schizo_rating, | |
"schizo_word_count": schizo_word_count, | |
"word_count": word_count, | |
"markdown_count": markdown_count | |
} | |
def fetch_model_readme(model_id): | |
"""Fetch README for a given model ID""" | |
try: | |
# Try to get the readme content | |
url = f"https://huggingface.co/{model_id}/raw/main/README.md" | |
response = requests.get(url) | |
if response.status_code == 200: | |
return response.text | |
else: | |
return None | |
except Exception as e: | |
print(f"Error fetching README for {model_id}: {e}") | |
return None | |
def generate_leaderboard_data(model_type="llm", max_models=500): | |
"""Generate leaderboard data by analyzing model cards""" | |
api = HfApi(token=HF_TOKEN) | |
# Define filter parameters based on model type | |
if model_type == "llm": | |
# Filter to text-generation models using API parameters | |
models = list_models( | |
task="text-generation", | |
limit=max_models # Set a reasonable limit to avoid overwhelming the API | |
) | |
else: | |
# Get all models | |
models = list_models( | |
limit=max_models # Set a reasonable limit to avoid overwhelming the API | |
) | |
leaderboard_data = [] | |
processed_count = 0 | |
for model in models: | |
model_id = model.id | |
readme_content = fetch_model_readme(model_id) | |
if readme_content is None or len(readme_content.strip()) == 0: | |
# Skip models without READMEs | |
continue | |
# Calculate ratings | |
ratings = calculate_schizo_rating(readme_content) | |
# Add to leaderboard data | |
leaderboard_data.append({ | |
"model_id": model_id, | |
"combined_rating": ratings["combined"], | |
"word_rating": ratings["word"], | |
"wordiness_rating": ratings["wordiness"], | |
"visual_rating": ratings["visual"], | |
"schizo_word_count": ratings["schizo_word_count"], | |
"word_count": ratings["word_count"], | |
"markdown_count": ratings["markdown_count"] | |
}) | |
processed_count += 1 | |
# Status update | |
if processed_count % 10 == 0: | |
print(f"Processed {processed_count} models") | |
# Sort by combined rating in descending order | |
leaderboard_data.sort(key=lambda x: x["combined_rating"], reverse=True) | |
return leaderboard_data | |
def create_leaderboard_html(leaderboard_data): | |
"""Create HTML for the leaderboard""" | |
html = """ | |
<div class="leaderboard-container"> | |
<table id="leaderboard"> | |
<tr> | |
<th onclick="sortTable(0)">Model</th> | |
<th onclick="sortTable(1, true)">Average Schizo Rating</th> | |
<th onclick="sortTable(2, true)">Visual Schizo Rating</th> | |
<th onclick="sortTable(3, true)">Wordiness Schizo Rating</th> | |
<th onclick="sortTable(4, true)">Overall Schizo Rating</th> | |
</tr> | |
""" | |
for item in leaderboard_data: | |
html += f""" | |
<tr> | |
<td>{item["model_id"]}</td> | |
<td>{item["combined_rating"]:.2f}</td> | |
<td>{item["visual_rating"]:.2f}</td> | |
<td>{item["wordiness_rating"]:.2f}</td> | |
<td>{item["word_rating"]:.2f}</td> | |
</tr> | |
""" | |
html += """ | |
</table> | |
</div> | |
<script> | |
function sortTable(n, isNumeric = false) { | |
var table, rows, switching, i, x, y, shouldSwitch, dir, switchcount = 0; | |
table = document.getElementById("leaderboard"); | |
switching = true; | |
dir = "asc"; | |
while (switching) { | |
switching = false; | |
rows = table.rows; | |
for (i = 1; i < (rows.length - 1); i++) { | |
shouldSwitch = false; | |
x = rows[i].getElementsByTagName("TD")[n]; | |
y = rows[i + 1].getElementsByTagName("TD")[n]; | |
if (dir == "asc") { | |
if (isNumeric) { | |
if (parseFloat(x.innerHTML) > parseFloat(y.innerHTML)) { | |
shouldSwitch = true; | |
break; | |
} | |
} else { | |
if (x.innerHTML.toLowerCase() > y.innerHTML.toLowerCase()) { | |
shouldSwitch = true; | |
break; | |
} | |
} | |
} else if (dir == "desc") { | |
if (isNumeric) { | |
if (parseFloat(x.innerHTML) < parseFloat(y.innerHTML)) { | |
shouldSwitch = true; | |
break; | |
} | |
} else { | |
if (x.innerHTML.toLowerCase() < y.innerHTML.toLowerCase()) { | |
shouldSwitch = true; | |
break; | |
} | |
} | |
} | |
} | |
if (shouldSwitch) { | |
rows[i].parentNode.insertBefore(rows[i + 1], rows[i]); | |
switching = true; | |
switchcount++; | |
} else { | |
if (switchcount == 0 && dir == "asc") { | |
dir = "desc"; | |
switching = true; | |
} | |
} | |
} | |
} | |
</script> | |
""" | |
return html | |
def load_leaderboard(model_type): | |
"""Load the leaderboard with models""" | |
# Show loading message | |
yield '<div class="loading">Loading models and analyzing Schizo ratings... This may take a few minutes.</div>' | |
try: | |
leaderboard_data = generate_leaderboard_data(model_type) | |
leaderboard_html = create_leaderboard_html(leaderboard_data) | |
yield leaderboard_html | |
except Exception as e: | |
yield f'<div class="loading">Error generating leaderboard: {str(e)}</div>' | |
# Background loading thread | |
def background_loader(model_type, progress=None): | |
try: | |
leaderboard_data = generate_leaderboard_data(model_type) | |
leaderboard_html = create_leaderboard_html(leaderboard_data) | |
return leaderboard_html | |
except Exception as e: | |
return f'<div class="loading">Error generating leaderboard: {str(e)}</div>' | |
def init_leaderboard(): | |
return '<div class="loading">Initializing leaderboard... Please wait while we analyze Hugging Face models.</div>' | |
with gr.Blocks(css=CSS, theme="soft") as demo: | |
gr.HTML(TITLE) | |
gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button") | |
with gr.Row(): | |
with gr.Column(): | |
model_type_dropdown = gr.Dropdown( | |
choices=["llm", "all"], | |
value="llm", | |
label="Model Type Filter", | |
) | |
refresh_button = gr.Button("Refresh Leaderboard") | |
leaderboard_html = gr.HTML(value=init_leaderboard) | |
# Load leaderboard on startup and when refresh button is clicked | |
demo.load( | |
fn=load_leaderboard, | |
inputs=[model_type_dropdown], | |
outputs=[leaderboard_html], | |
) | |
refresh_button.click( | |
fn=load_leaderboard, | |
inputs=[model_type_dropdown], | |
outputs=[leaderboard_html], | |
) | |
if __name__ == "__main__": | |
demo.launch() |