Spaces:

AzureModels4AI
/

PeopleModelsDatasets2X

Sleeping

App Files Files Community

awacke1 commited on Sep 17, 2024

Commit

94380fb

verified ·

1 Parent(s): ffa9aed

Create app.py

Browse files

Files changed (1) hide show

app.py +187 -0

app.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import streamlit as st
+import requests
+import base64
+import os
+import asyncio
+from huggingface_hub import HfApi
+import plotly.express as px
+# Initialize the Hugging Face API
+api = HfApi()
+# Directory to save the downloaded and generated files
+HTML_DIR = "generated_html_pages"
+if not os.path.exists(HTML_DIR):
+    os.makedirs(HTML_DIR)
+# Default list of Hugging Face usernames
+default_users = {
+    "users": [
+        "awacke1", "rogerxavier", "jonatasgrosman", "kenshinn", "Csplk", "DavidVivancos",
+        "cdminix", "Jaward", "TuringsSolutions", "Severian", "Wauplin",
+        "phosseini", "Malikeh1375", "gokaygokay", "MoritzLaurer", "mrm8488",
+        "TheBloke", "lhoestq", "xw-eric", "Paul", "Muennighoff",
+        "ccdv", "haonan-li", "chansung", "lukaemon", "hails",
+        "pharmapsychotic", "KingNish", "merve", "ameerazam08", "ashleykleynhans"
+    ]
+}
+# Asynchronous function to fetch user content using Hugging Face API
+async def fetch_user_content(username):
+    try:
+        # Fetch models and datasets
+        models = list(await asyncio.to_thread(api.list_models, author=username))
+        datasets = list(await asyncio.to_thread(api.list_datasets, author=username))
+        return {
+            "username": username,
+            "models": models,
+            "datasets": datasets
+        }
+    except Exception as e:
+        return {"username": username, "error": str(e)}
+# Fetch all users concurrently
+async def fetch_all_users(usernames):
+    tasks = [fetch_user_content(username) for username in usernames]
+    return await asyncio.gather(*tasks)
+# Function to download the user page using requests
+def download_user_page(username):
+    url = f"https://huggingface.co/{username}"
+    try:
+        response = requests.get(url)
+        response.raise_for_status()
+        html_content = response.text
+        html_file_path = os.path.join(HTML_DIR, f"{username}.html")
+        with open(html_file_path, "w", encoding='utf-8') as html_file:
+            html_file.write(html_content)
+        return html_file_path, None
+    except Exception as e:
+        return None, str(e)
+# Function to base64 encode the HTML file
+def encode_html_to_base64(html_file_path):
+    try:
+        with open(html_file_path, "rb") as file:
+            encoded_bytes = base64.b64encode(file.read())
+            encoded_str = encoded_bytes.decode('utf-8')
+        return encoded_str, None
+    except Exception as e:
+        return None, str(e)
+# Cache the downloaded and encoded content to avoid redundant operations
+@st.cache_data(show_spinner=False, ttl=3600)
+def get_cached_base64_html(username):
+    html_file_path, error = download_user_page(username)
+    if error:
+        return None, error
+    encoded_str, encode_error = encode_html_to_base64(html_file_path)
+    if encode_error:
+        return None, encode_error
+    return encoded_str, None
+# Streamlit app setup
+st.title("Hugging Face User Page Downloader 📄✨")
+# Text area with default list of usernames
+user_input = st.text_area(
+    "Enter Hugging Face usernames (one per line):",
+    value="\n".join(default_users["users"]),
+    height=300
+)
+# Show User Content button
+if st.button("Show User Content"):
+    if user_input:
+        username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
+        # Collect statistics for Plotly graphs
+        stats = {"username": [], "models_count": [], "datasets_count": []}
+        st.markdown("### User Content Overview")
+        for username in username_list:
+            with st.container():
+                # Profile link
+                st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
+                # Fetch models and datasets
+                user_data = asyncio.run(fetch_user_content(username))
+                if "error" in user_data:
+                    st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
+                else:
+                    models = user_data["models"]
+                    datasets = user_data["datasets"]
+                    # Encode the downloaded HTML page to base64
+                    base64_html, encode_error = get_cached_base64_html(username)
+                    if base64_html:
+                        # Provide a download link for the base64-encoded HTML
+                        b64_filename = f"{username}_base64.txt"
+                        st.download_button(
+                            label=f"📥 Download {username}'s Base64 Encoded HTML",
+                            data=base64_html,
+                            file_name=b64_filename,
+                            mime="text/plain"
+                        )
+                    else:
+                        st.error(f"Failed to encode HTML for {username}: {encode_error}")
+                    # Add to statistics
+                    stats["username"].append(username)
+                    stats["models_count"].append(len(models))
+                    stats["datasets_count"].append(len(datasets))
+                    # Display models
+                    with st.expander(f"🧠 Models ({len(models)})", expanded=False):
+                        if models:
+                            for model in models:
+                                model_name = model.modelId.split("/")[-1]
+                                st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
+                        else:
+                            st.markdown("No models found. 🤷‍♂️")
+                    # Display datasets
+                    with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
+                        if datasets:
+                            for dataset in datasets:
+                                dataset_name = dataset.id.split("/")[-1]
+                                st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
+                        else:
+                            st.markdown("No datasets found. 🤷‍♀️")
+                st.markdown("---")
+        # Plotly graphs to visualize the number of models and datasets each user has
+        if stats["username"]:
+            st.markdown("### User Content Statistics")
+            # Number of models per user
+            fig_models = px.bar(
+                x=stats["username"],
+                y=stats["models_count"],
+                labels={'x': 'Username', 'y': 'Number of Models'},
+                title="Number of Models per User"
+            )
+            st.plotly_chart(fig_models)
+            # Number of datasets per user
+            fig_datasets = px.bar(
+                x=stats["username"],
+                y=stats["datasets_count"],
+                labels={'x': 'Username', 'y': 'Number of Datasets'},
+                title="Number of Datasets per User"
+            )
+            st.plotly_chart(fig_datasets)
+    else:
+        st.warning("Please enter at least one username. Don't be shy! 😅")
+# Sidebar instructions
+st.sidebar.markdown("""
+## How to use:
+1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
+2. Click **'Show User Content'**.
+3. View each user's models and datasets along with a link to their Hugging Face profile.
+4. **Download a base64-encoded HTML page** for each user by clicking the download button.
+5. Check out the statistics visualizations below!
+""")