Spaces:
Running
Running
Upload 7 files
Browse files- README.md +1 -1
- civitai_to_hf.py +58 -1
- utils.py +5 -1
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: 🤗
|
|
4 |
colorFrom: yellow
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
4 |
colorFrom: yellow
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.17.1
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
civitai_to_hf.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import HfApi, hf_hub_url
|
|
|
3 |
import os
|
4 |
from pathlib import Path
|
5 |
import gc
|
6 |
import requests
|
7 |
from requests.adapters import HTTPAdapter
|
8 |
from urllib3.util import Retry
|
|
|
9 |
from utils import (get_token, set_token, is_repo_exists, get_user_agent, get_download_file,
|
10 |
list_uniq, list_sub, duplicate_hf_repo, HF_SUBFOLDER_NAME, get_state, set_state)
|
11 |
import re
|
@@ -65,6 +67,55 @@ def upload_safetensors_to_repo(filename, repo_id, repo_type, is_private, progres
|
|
65 |
return url
|
66 |
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def is_same_file(filename: str, cmp_sha256: str, cmp_size: int):
|
69 |
if cmp_sha256:
|
70 |
sha256_hash = hashlib.sha256()
|
@@ -152,16 +203,22 @@ def upload_info_to_repo(dl_url, filename, repo_id, repo_type, is_private, civita
|
|
152 |
def download_civitai(dl_url, civitai_key, hf_token, urls,
|
153 |
newrepo_id, repo_type="model", is_private=True, is_info=False, is_rename=True, progress=gr.Progress(track_tqdm=True)):
|
154 |
if hf_token: set_token(hf_token)
|
155 |
-
else: set_token(os.
|
156 |
if not civitai_key: civitai_key = os.environ.get("CIVITAI_API_KEY") # default Civitai API key
|
157 |
if not newrepo_id: newrepo_id = os.environ.get("HF_REPO") # default repo to upload
|
158 |
if not get_token() or not civitai_key: raise gr.Error("HF write token and Civitai API key is required.")
|
159 |
if not urls: urls = []
|
160 |
dl_urls = parse_urls(dl_url)
|
161 |
remain_urls = dl_urls.copy()
|
|
|
162 |
try:
|
163 |
md = f'### Your repo: [{newrepo_id}]({"https://huggingface.co/datasets/" if repo_type == "dataset" else "https://huggingface.co/"}{newrepo_id})\n'
|
164 |
for u in dl_urls:
|
|
|
|
|
|
|
|
|
|
|
165 |
file = download_file(u, civitai_key)
|
166 |
if not Path(file).exists() or not Path(file).is_file(): continue
|
167 |
if is_rename: file = get_safe_filename(file, newrepo_id, repo_type)
|
|
|
1 |
import gradio as gr
|
2 |
from huggingface_hub import HfApi, hf_hub_url
|
3 |
+
from huggingface_hub.hf_api import RepoFile
|
4 |
import os
|
5 |
from pathlib import Path
|
6 |
import gc
|
7 |
import requests
|
8 |
from requests.adapters import HTTPAdapter
|
9 |
from urllib3.util import Retry
|
10 |
+
import urllib
|
11 |
from utils import (get_token, set_token, is_repo_exists, get_user_agent, get_download_file,
|
12 |
list_uniq, list_sub, duplicate_hf_repo, HF_SUBFOLDER_NAME, get_state, set_state)
|
13 |
import re
|
|
|
67 |
return url
|
68 |
|
69 |
|
70 |
+
def get_repo_hashes(repo_id: str, repo_type: str="model"):
|
71 |
+
hf_token = get_token()
|
72 |
+
api = HfApi(token=hf_token)
|
73 |
+
hashes = []
|
74 |
+
try:
|
75 |
+
if not api.repo_exists(repo_id=repo_id, repo_type=repo_type, token=hf_token): return hashes
|
76 |
+
tree = api.list_repo_tree(repo_id=repo_id, repo_type=repo_type, token=hf_token)
|
77 |
+
for f in tree:
|
78 |
+
if not isinstance(f, RepoFile) or f.lfs is None: continue
|
79 |
+
hashes.append(f.lfs["sha256"])
|
80 |
+
except Exception as e:
|
81 |
+
print(e)
|
82 |
+
finally:
|
83 |
+
return hashes
|
84 |
+
|
85 |
+
|
86 |
+
def get_civitai_sha256(dl_url: str, api_key=""):
|
87 |
+
def is_invalid_file(qs: dict, json: dict, k: str):
|
88 |
+
return k in qs.keys() and qs[k][0] != json.get(k, None) and json.get(k, None) is not None
|
89 |
+
|
90 |
+
if "https://civitai.com/api/download/models/" not in dl_url: return None
|
91 |
+
user_agent = get_user_agent()
|
92 |
+
headers = {'User-Agent': user_agent, 'content-type': 'application/json'}
|
93 |
+
if api_key: headers['Authorization'] = f'Bearer {{{api_key}}}'
|
94 |
+
base_url = 'https://civitai.com/api/v1/model-versions/'
|
95 |
+
params = {}
|
96 |
+
session = requests.Session()
|
97 |
+
retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
|
98 |
+
session.mount("https://", HTTPAdapter(max_retries=retries))
|
99 |
+
m = re.match(r'https://civitai.com/api/download/models/(\d+)\??(.+)?', dl_url)
|
100 |
+
if m is None: return None
|
101 |
+
url = base_url + m.group(1)
|
102 |
+
qs = urllib.parse.parse_qs(m.group(2))
|
103 |
+
if "type" not in qs.keys(): qs["type"] = ["Model"]
|
104 |
+
try:
|
105 |
+
r = session.get(url, params=params, headers=headers, stream=True, timeout=(5.0, 15))
|
106 |
+
if not r.ok: return None
|
107 |
+
json = dict(r.json())
|
108 |
+
if "files" not in json.keys() or not isinstance(json["files"], list): return None
|
109 |
+
hash = None
|
110 |
+
for d in json["files"]:
|
111 |
+
if is_invalid_file(qs, d, "type") or is_invalid_file(qs, d, "format") or is_invalid_file(qs, d, "size") or is_invalid_file(qs, d, "fp"): continue
|
112 |
+
hash = d["hashes"]["SHA256"].lower()
|
113 |
+
return hash
|
114 |
+
except Exception as e:
|
115 |
+
print(e)
|
116 |
+
return None
|
117 |
+
|
118 |
+
|
119 |
def is_same_file(filename: str, cmp_sha256: str, cmp_size: int):
|
120 |
if cmp_sha256:
|
121 |
sha256_hash = hashlib.sha256()
|
|
|
203 |
def download_civitai(dl_url, civitai_key, hf_token, urls,
|
204 |
newrepo_id, repo_type="model", is_private=True, is_info=False, is_rename=True, progress=gr.Progress(track_tqdm=True)):
|
205 |
if hf_token: set_token(hf_token)
|
206 |
+
else: set_token(os.getenv("HF_TOKEN", False)) # default huggingface write token
|
207 |
if not civitai_key: civitai_key = os.environ.get("CIVITAI_API_KEY") # default Civitai API key
|
208 |
if not newrepo_id: newrepo_id = os.environ.get("HF_REPO") # default repo to upload
|
209 |
if not get_token() or not civitai_key: raise gr.Error("HF write token and Civitai API key is required.")
|
210 |
if not urls: urls = []
|
211 |
dl_urls = parse_urls(dl_url)
|
212 |
remain_urls = dl_urls.copy()
|
213 |
+
hashes = set(get_repo_hashes(newrepo_id, repo_type))
|
214 |
try:
|
215 |
md = f'### Your repo: [{newrepo_id}]({"https://huggingface.co/datasets/" if repo_type == "dataset" else "https://huggingface.co/"}{newrepo_id})\n'
|
216 |
for u in dl_urls:
|
217 |
+
if get_civitai_sha256(u, civitai_key) in hashes:
|
218 |
+
print(f"{u} is already exitsts. skipping.")
|
219 |
+
remain_urls.remove(u)
|
220 |
+
md += f"- Skipped [{str(u)}]({str(u)})\n"
|
221 |
+
continue
|
222 |
file = download_file(u, civitai_key)
|
223 |
if not Path(file).exists() or not Path(file).is_file(): continue
|
224 |
if is_rename: file = get_safe_filename(file, newrepo_id, repo_type)
|
utils.py
CHANGED
@@ -15,7 +15,7 @@ def get_token():
|
|
15 |
try:
|
16 |
token = HfFolder.get_token()
|
17 |
except Exception:
|
18 |
-
token =
|
19 |
return token
|
20 |
|
21 |
|
@@ -170,6 +170,10 @@ def get_download_file(temp_dir, url, civitai_key, progress=gr.Progress(track_tqd
|
|
170 |
elif Path(f"{temp_dir}/{url.split('/')[-1]}").exists():
|
171 |
print(f"File to download alreday exists: {url}")
|
172 |
new_file = f"{temp_dir}/{url.split('/')[-1]}"
|
|
|
|
|
|
|
|
|
173 |
else:
|
174 |
print(f"Start downloading: {url}")
|
175 |
recursive = False if "huggingface.co" in url else True
|
|
|
15 |
try:
|
16 |
token = HfFolder.get_token()
|
17 |
except Exception:
|
18 |
+
token = False
|
19 |
return token
|
20 |
|
21 |
|
|
|
170 |
elif Path(f"{temp_dir}/{url.split('/')[-1]}").exists():
|
171 |
print(f"File to download alreday exists: {url}")
|
172 |
new_file = f"{temp_dir}/{url.split('/')[-1]}"
|
173 |
+
elif "huggingface.co" in url:
|
174 |
+
url = url.replace("?download=true", "")
|
175 |
+
if "/blob/" in url: url = url.replace("/blob/", "/resolve/")
|
176 |
+
new_file = download_hf_file(temp_dir, url)
|
177 |
else:
|
178 |
print(f"Start downloading: {url}")
|
179 |
recursive = False if "huggingface.co" in url else True
|