awacke1 commited on
Commit
3da9e90
·
verified ·
1 Parent(s): f856dfb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +220 -0
app.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import requests
3
+ import base64
4
+ import os
5
+ import asyncio
6
+ from huggingface_hub import HfApi, snapshot_download
7
+ import plotly.express as px
8
+ import zipfile
9
+ import tempfile
10
+ import shutil
11
+
12
+ # Initialize the Hugging Face API
13
+ api = HfApi()
14
+
15
+ # Directories for saving files
16
+ HTML_DIR = "generated_html_pages"
17
+ ZIP_DIR = "generated_zips"
18
+ SNAPSHOT_DIR = "snapshot_downloads"
19
+
20
+ for directory in [HTML_DIR, ZIP_DIR, SNAPSHOT_DIR]:
21
+ if not os.path.exists(directory):
22
+ os.makedirs(directory)
23
+
24
+ # Default list of Hugging Face usernames
25
+ default_users = {
26
+ "users": [
27
+ "awacke1", "rogerxavier", "jonatasgrosman", "kenshinn", "Csplk", "DavidVivancos",
28
+ "cdminix", "Jaward", "TuringsSolutions", "Severian", "Wauplin",
29
+ "phosseini", "Malikeh1375", "gokaygokay", "MoritzLaurer", "mrm8488",
30
+ "TheBloke", "lhoestq", "xw-eric", "Paul", "Muennighoff",
31
+ "ccdv", "haonan-li", "chansung", "lukaemon", "hails",
32
+ "pharmapsychotic", "KingNish", "merve", "ameerazam08", "ashleykleynhans"
33
+ ]
34
+ }
35
+
36
+ async def fetch_user_content(username):
37
+ try:
38
+ models = list(await asyncio.to_thread(api.list_models, author=username))
39
+ datasets = list(await asyncio.to_thread(api.list_datasets, author=username))
40
+ return {
41
+ "username": username,
42
+ "models": models,
43
+ "datasets": datasets
44
+ }
45
+ except Exception as e:
46
+ return {"username": username, "error": str(e)}
47
+
48
+ def download_user_page(username):
49
+ url = f"https://huggingface.co/{username}"
50
+ try:
51
+ response = requests.get(url)
52
+ response.raise_for_status()
53
+ html_content = response.text
54
+ html_file_path = os.path.join(HTML_DIR, f"{username}.html")
55
+ with open(html_file_path, "w", encoding='utf-8') as html_file:
56
+ html_file.write(html_content)
57
+ return html_file_path, None
58
+ except Exception as e:
59
+ return None, str(e)
60
+
61
+ @st.cache_resource
62
+ def create_zip_of_files(files, zip_name):
63
+ zip_file_path = os.path.join(ZIP_DIR, zip_name)
64
+ with zipfile.ZipFile(zip_file_path, 'w') as zipf:
65
+ for file in files:
66
+ zipf.write(file, arcname=os.path.basename(file))
67
+ return zip_file_path
68
+
69
+ @st.cache_resource
70
+ def get_download_link(file_path, link_text):
71
+ with open(file_path, 'rb') as f:
72
+ data = f.read()
73
+ b64 = base64.b64encode(data).decode()
74
+ return f'<a href="data:application/zip;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
75
+
76
+ async def fetch_all_users(usernames):
77
+ tasks = [fetch_user_content(username) for username in usernames]
78
+ return await asyncio.gather(*tasks)
79
+
80
+ def get_all_html_files(usernames):
81
+ html_files = []
82
+ errors = {}
83
+ for username in usernames:
84
+ html_file, error = download_user_page(username)
85
+ if html_file:
86
+ html_files.append(html_file)
87
+ else:
88
+ errors[username] = error
89
+ return html_files, errors
90
+
91
+ def perform_snapshot_download(repo_id, repo_type):
92
+ try:
93
+ temp_dir = tempfile.mkdtemp()
94
+ snapshot_download(repo_id=repo_id, repo_type=repo_type, local_dir=temp_dir)
95
+ zip_name = f"{repo_id.replace('/', '_')}_{repo_type}.zip"
96
+ zip_path = os.path.join(SNAPSHOT_DIR, zip_name)
97
+ shutil.make_archive(zip_path[:-4], 'zip', temp_dir)
98
+ shutil.rmtree(temp_dir)
99
+ return zip_path
100
+ except Exception as e:
101
+ return str(e)
102
+
103
+ st.title("Hugging Face User Page Downloader & Zipper 📄➕📦")
104
+
105
+ user_input = st.text_area(
106
+ "Enter Hugging Face usernames (one per line):",
107
+ value="\n".join(default_users["users"]),
108
+ height=300
109
+ )
110
+
111
+ if st.button("Show User Content and Download Snapshots"):
112
+ if user_input:
113
+ username_list = [username.strip() for username in user_input.split('\n') if username.strip()]
114
+
115
+ user_data_list = asyncio.run(fetch_all_users(username_list))
116
+
117
+ stats = {"username": [], "models_count": [], "datasets_count": []}
118
+ successful_html_files = []
119
+ snapshot_downloads = []
120
+
121
+ st.markdown("### User Content Overview")
122
+ for user_data in user_data_list:
123
+ username = user_data["username"]
124
+ with st.container():
125
+ st.markdown(f"**{username}** [🔗 Profile](https://huggingface.co/{username})")
126
+
127
+ if "error" in user_data:
128
+ st.warning(f"{username}: {user_data['error']} - Something went wrong! ⚠️")
129
+ else:
130
+ models = user_data["models"]
131
+ datasets = user_data["datasets"]
132
+
133
+ html_file_path, download_error = download_user_page(username)
134
+ if html_file_path:
135
+ successful_html_files.append(html_file_path)
136
+ st.success(f"✅ Successfully downloaded {username}'s page.")
137
+ else:
138
+ st.error(f"❌ Failed to download {username}'s page: {download_error}")
139
+
140
+ stats["username"].append(username)
141
+ stats["models_count"].append(len(models))
142
+ stats["datasets_count"].append(len(datasets))
143
+
144
+ with st.expander(f"🧠 Models ({len(models)})", expanded=False):
145
+ if models:
146
+ for model in models:
147
+ model_name = model.modelId.split("/")[-1]
148
+ st.markdown(f"- [{model_name}](https://huggingface.co/{model.modelId})")
149
+ if st.button(f"Download Snapshot: {model_name}", key=f"model_{model.modelId}"):
150
+ with st.spinner(f"Downloading snapshot for {model_name}..."):
151
+ result = perform_snapshot_download(model.modelId, "model")
152
+ if isinstance(result, str):
153
+ st.error(f"Failed to download {model_name}: {result}")
154
+ else:
155
+ snapshot_downloads.append(result)
156
+ st.success(f"Successfully downloaded snapshot for {model_name}")
157
+ else:
158
+ st.markdown("No models found. 🤷‍♂️")
159
+
160
+ with st.expander(f"📚 Datasets ({len(datasets)})", expanded=False):
161
+ if datasets:
162
+ for dataset in datasets:
163
+ dataset_name = dataset.id.split("/")[-1]
164
+ st.markdown(f"- [{dataset_name}](https://huggingface.co/datasets/{dataset.id})")
165
+ if st.button(f"Download Snapshot: {dataset_name}", key=f"dataset_{dataset.id}"):
166
+ with st.spinner(f"Downloading snapshot for {dataset_name}..."):
167
+ result = perform_snapshot_download(dataset.id, "dataset")
168
+ if isinstance(result, str):
169
+ st.error(f"Failed to download {dataset_name}: {result}")
170
+ else:
171
+ snapshot_downloads.append(result)
172
+ st.success(f"Successfully downloaded snapshot for {dataset_name}")
173
+ else:
174
+ st.markdown("No datasets found. 🤷‍♀️")
175
+
176
+ st.markdown("---")
177
+
178
+ if successful_html_files:
179
+ html_zip_path = create_zip_of_files(successful_html_files, "HuggingFace_User_Pages.zip")
180
+ html_download_link = get_download_link(html_zip_path, "📥 Download All HTML Pages as ZIP")
181
+ st.markdown(html_download_link, unsafe_allow_html=True)
182
+ else:
183
+ st.warning("No HTML files were successfully downloaded to create a ZIP archive.")
184
+
185
+ if snapshot_downloads:
186
+ snapshot_zip_path = create_zip_of_files(snapshot_downloads, "HuggingFace_Snapshots.zip")
187
+ snapshot_download_link = get_download_link(snapshot_zip_path, "📥 Download All Snapshots as ZIP")
188
+ st.markdown(snapshot_download_link, unsafe_allow_html=True)
189
+
190
+ if stats["username"]:
191
+ st.markdown("### User Content Statistics")
192
+
193
+ fig_models = px.bar(
194
+ x=stats["username"],
195
+ y=stats["models_count"],
196
+ labels={'x': 'Username', 'y': 'Number of Models'},
197
+ title="Number of Models per User"
198
+ )
199
+ st.plotly_chart(fig_models)
200
+
201
+ fig_datasets = px.bar(
202
+ x=stats["username"],
203
+ y=stats["datasets_count"],
204
+ labels={'x': 'Username', 'y': 'Number of Datasets'},
205
+ title="Number of Datasets per User"
206
+ )
207
+ st.plotly_chart(fig_datasets)
208
+
209
+ else:
210
+ st.warning("Please enter at least one username. Don't be shy! 😅")
211
+
212
+ st.sidebar.markdown("""
213
+ ## How to use:
214
+ 1. The text area is pre-filled with a list of Hugging Face usernames. You can edit this list or add more usernames.
215
+ 2. Click **'Show User Content and Download Snapshots'**.
216
+ 3. View each user's models and datasets along with a link to their Hugging Face profile.
217
+ 4. For each model or dataset, you can click the "Download Snapshot" button to download a snapshot.
218
+ 5. **Download ZIP archives** containing all the HTML pages and snapshots by clicking the download links.
219
+ 6. Check out the statistics visualizations below!
220
+ """)