|
import streamlit as st |
|
import requests |
|
import base64 |
|
import os |
|
import asyncio |
|
from huggingface_hub import HfApi, snapshot_download |
|
import plotly.express as px |
|
import zipfile |
|
import tempfile |
|
import shutil |
|
from bs4 import BeautifulSoup |
|
from PIL import Image |
|
import glob |
|
from datetime import datetime |
|
import pytz |
|
from urllib.parse import quote |
|
|
|
|
|
api = HfApi() |
|
|
|
|
|
HTML_DIR = "generated_html_pages" |
|
ZIP_DIR = "generated_zips" |
|
SNAPSHOT_DIR = "snapshot_downloads" |
|
|
|
for directory in [HTML_DIR, ZIP_DIR, SNAPSHOT_DIR]: |
|
if not os.path.exists(directory): |
|
os.makedirs(directory) |
|
|
|
|
|
default_users = { |
|
"users": [ |
|
"awacke1", "rogerxavier", "jonatasgrosman", "kenshinn", "Csplk", "DavidVivancos", |
|
"cdminix", "Jaward", "TuringsSolutions", "Severian", "Wauplin", |
|
"phosseini", "Malikeh1375", "gokaygokay", "MoritzLaurer", "mrm8488", |
|
"TheBloke", "lhoestq", "xw-eric", "Paul", "Muennighoff", |
|
"ccdv", "haonan-li", "chansung", "lukaemon", "hails", |
|
"pharmapsychotic", "KingNish", "merve", "ameerazam08", "ashleykleynhans" |
|
] |
|
} |
|
|
|
async def fetch_user_content(username): |
|
try: |
|
models = list(await asyncio.to_thread(api.list_models, author=username)) |
|
datasets = list(await asyncio.to_thread(api.list_datasets, author=username)) |
|
return { |
|
"username": username, |
|
"models": models, |
|
"datasets": datasets |
|
} |
|
except Exception as e: |
|
return {"username": username, "error": str(e)} |
|
|
|
def download_user_page(username): |
|
url = f"https://huggingface.co/{username}" |
|
try: |
|
response = requests.get(url) |
|
response.raise_for_status() |
|
html_content = response.text |
|
html_file_path = os.path.join(HTML_DIR, f"{username}.html") |
|
with open(html_file_path, "w", encoding='utf-8') as html_file: |
|
html_file.write(html_content) |
|
return html_file_path, html_content, None |
|
except Exception as e: |
|
return None, None, str(e) |
|
|
|
@st.cache_resource |
|
def create_zip_of_files(files, zip_name): |
|
zip_file_path = os.path.join(ZIP_DIR, zip_name) |
|
with zipfile.ZipFile(zip_file_path, 'w') as zipf: |
|
for file in files: |
|
zipf.write(file, arcname=os.path.basename(file)) |
|
return zip_file_path |
|
|
|
@st.cache_resource |
|
def get_download_link(file_path, link_text): |
|
with open(file_path, 'rb') as f: |
|
data = f.read() |
|
b64 = base64.b64encode(data).decode() |
|
return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>' |
|
|
|
async def fetch_all_users(usernames): |
|
tasks = [fetch_user_content(username) for username in usernames] |
|
return await asyncio.gather(*tasks) |
|
|
|
def perform_snapshot_download(repo_id, repo_type): |
|
try: |
|
temp_dir = tempfile.mkdtemp() |
|
snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=temp_dir) |
|
zip_name = f"{repo_id.replace('/', '_')}_{repo_type}.zip" |
|
zip_path = os.path.join(SNAPSHOT_DIR, zip_name) |
|
shutil.make_archive(zip_path[:-4], 'zip', temp_dir) |
|
shutil.rmtree(temp_dir) |
|
return zip_path |
|
except Exception as e: |
|
return str(e) |
|
|
|
|
|
def display_html_grid(html_files): |
|
num_columns = 3 |
|
for i in range(0, len(html_files), num_columns): |
|
cols = st.columns(num_columns) |
|
for j in range(num_columns): |
|
if i + j < len(html_files): |
|
with cols[j]: |
|
with open(html_files[i+j], 'r', encoding='utf-8') as file: |
|
html_content = file.read() |
|
soup = BeautifulSoup(html_content, 'html.parser') |
|
st.subheader(f"Page: {os.path.basename(html_files[i+j])}") |
|
st.components.v1.html(str(soup.body), height=300, scrolling=True) |
|
|
|
|
|
def display_images_from_html(html_file): |
|
with open(html_file, 'r', encoding='utf-8') as file: |
|
html_content = file.read() |
|
soup = BeautifulSoup(html_content, 'html.parser') |
|
images = soup.find_all('img') |
|
for img in images: |
|
src = img.get('src') |
|
if src and src.startswith('http'): |
|
|
|
st.image(src, use_container_width=True) |
|
|
|
|
|
def display_videos_from_html(html_file): |
|
with open(html_file, 'r', encoding='utf-8') as file: |
|
html_content = file.read() |
|
soup = BeautifulSoup(html_content, 'html.parser') |
|
videos = soup.find_all('video') |
|
for video in videos: |
|
src = video.find('source').get('src') |
|
if src and src.startswith('http'): |
|
st.video(src) |
|
|
|
def main(): |
|
st.title("🧑💼People🧠Models📚Datasets") |
|
|
|
user_input = st.text_area( |
|
"Enter Hugging Face usernames (one per line):", |
|
value="\n".join(default_users["users"]), |
|
height=300 |
|
) |
|
|
|
if st.button("Show User Content and Download Snapshots"): |
|
if user_input: |
|
username_list = [username.strip() for username in user_input.split('\n') if username.strip()] |
|
|
|
user_data_list = asyncio.run(fetch_all_users(username_list)) |
|
|
|
stats = {"username": [], "models_count": [], "datasets_count": []} |
|
successful_html_files = [] |
|
snapshot_downloads = [] |
|
|
|
st.markdown("### User Content Overview") |
|
for user_data in user_data_list: |
|
username = user_data["username"] |
|
with st.container(): |
|
st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})") |
|
|
|
if "error" in user_data: |
|
st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️") |
|
else: |
|
models = user_data["models"] |
|
datasets = user_data["datasets"] |
|
|
|
html_file_path, html_content, download_error = download_user_page(username) |
|
if html_file_path and html_content: |
|
successful_html_files.append(html_file_path) |
|
st.success(f"✅ Successfully downloaded {username}'s page.") |
|
|
|
|
|
with st.expander(f"View {username}'s HTML page"): |
|
st.markdown(html_content, unsafe_allow_html=True) |
|
else: |
|
st.error(f"❌ Failed to download {username}'s page: {download_error}") |
|
|
|
stats["username"].append(username) |
|
stats["models_count"].append(len(models)) |
|
stats["datasets_count"].append(len(datasets)) |
|
|
|
with st.expander(f"🧠 Models ({len(models)})", expanded=False): |
|
if models: |
|
for model in models: |
|
model_name = model.modelId.split("/")[-1] |
|
st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})") |
|
if st.button(f"Download Snapshot: {model_name}", key=f"model_{model.modelId}"): |
|
with st.spinner(f"Downloading snapshot for {model_name}..."): |
|
result = perform_snapshot_download(model.modelId, "model") |
|
if isinstance(result, str): |
|
st.error(f"Failed to download {model_name}: {result}") |
|
else: |
|
snapshot_downloads.append(result) |
|
st.success(f"Successfully downloaded snapshot for {model_name}") |
|
else: |
|
st.markdown("No models found. 🤷♂️") |
|
|
|
with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False): |
|
if datasets: |
|
for dataset in datasets: |
|
dataset_name = dataset.id.split("/")[-1] |
|
st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})") |
|
if st.button(f"Download Snapshot: {dataset_name}", key=f"dataset_{dataset.id}"): |
|
with st.spinner(f"Downloading snapshot for {dataset_name}..."): |
|
result = perform_snapshot_download(dataset.id, "dataset") |
|
if isinstance(result, str): |
|
st.error(f"Failed to download {dataset_name}: {result}") |
|
else: |
|
snapshot_downloads.append(result) |
|
st.success(f"Successfully downloaded snapshot for {dataset_name}") |
|
else: |
|
st.markdown("No datasets found. 🤷♀️") |
|
|
|
st.markdown("---") |
|
|
|
if successful_html_files: |
|
st.markdown("### HTML Grid View") |
|
display_html_grid(successful_html_files) |
|
|
|
st.markdown("### Image Gallery") |
|
for html_file in successful_html_files: |
|
display_images_from_html(html_file) |
|
|
|
st.markdown("### Video Gallery") |
|
for html_file in successful_html_files: |
|
display_videos_from_html(html_file) |
|
|
|
html_zip_path = create_zip_of_files(successful_html_files, "HuggingFace_User_Pages.zip") |
|
html_download_link = get_download_link(html_zip_path, "📥 Download All HTML Pages as ZIP") |
|
st.markdown(html_download_link, unsafe_allow_html=True) |
|
else: |
|
st.warning("No HTML files were successfully downloaded to create a ZIP archive.") |
|
|
|
if snapshot_downloads: |
|
snapshot_zip_path = create_zip_of_files(snapshot_downloads, "HuggingFace_Snapshots.zip") |
|
snapshot_download_link = get_download_link(snapshot_zip_path, "📥 Download All Snapshots as ZIP") |
|
st.markdown(snapshot_download_link, unsafe_allow_html=True) |
|
|
|
if stats["username"]: |
|
st.markdown("### User Content Statistics") |
|
|
|
fig_models = px.bar( |
|
x=stats["username"], |
|
y=stats["models_count"], |
|
labels={'x': 'Username', 'y': 'Number of Models'}, |
|
title="Number of Models per User" |
|
) |
|
st.plotly_chart(fig_models) |
|
|
|
fig_datasets = px.bar( |
|
x=stats["username"], |
|
y=stats["datasets_count"], |
|
labels={'x': 'Username', 'y': 'Number of Datasets'}, |
|
title="Number of Datasets per User" |
|
) |
|
st.plotly_chart(fig_datasets) |
|
|
|
else: |
|
st.warning("Please enter at least one username. Don't be shy! 😅") |
|
|
|
st.sidebar.markdown(""" |
|
## How to use: |
|
1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames. |
|
2. Click **'Show User Content and Download Snapshots'**. |
|
3. View each user's models and datasets along with a link to their Hugging Face profile. |
|
4. For each model or dataset, you can click the "Download Snapshot" button to download a snapshot. |
|
5. **Download ZIP archives** containing all the HTML pages and snapshots by clicking the download links. |
|
6. Check out the statistics visualizations below! |
|
7. **New features:** |
|
- View all downloaded HTML pages in a grid layout |
|
- Browse through image and video galleries extracted from the HTML pages |
|
""") |
|
|
|
if __name__ == "__main__": |
|
main() |