John6666 commited on
Commit
f360df6
·
verified ·
1 Parent(s): b04cac4

Upload 7 files

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. civitai_to_hf.py +58 -1
  3. utils.py +5 -1
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🤗
4
  colorFrom: yellow
5
  colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.12.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: yellow
5
  colorTo: blue
6
  sdk: gradio
7
+ sdk_version: 5.17.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
civitai_to_hf.py CHANGED
@@ -1,11 +1,13 @@
1
  import gradio as gr
2
  from huggingface_hub import HfApi, hf_hub_url
 
3
  import os
4
  from pathlib import Path
5
  import gc
6
  import requests
7
  from requests.adapters import HTTPAdapter
8
  from urllib3.util import Retry
 
9
  from utils import (get_token, set_token, is_repo_exists, get_user_agent, get_download_file,
10
  list_uniq, list_sub, duplicate_hf_repo, HF_SUBFOLDER_NAME, get_state, set_state)
11
  import re
@@ -65,6 +67,55 @@ def upload_safetensors_to_repo(filename, repo_id, repo_type, is_private, progres
65
  return url
66
 
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  def is_same_file(filename: str, cmp_sha256: str, cmp_size: int):
69
  if cmp_sha256:
70
  sha256_hash = hashlib.sha256()
@@ -152,16 +203,22 @@ def upload_info_to_repo(dl_url, filename, repo_id, repo_type, is_private, civita
152
  def download_civitai(dl_url, civitai_key, hf_token, urls,
153
  newrepo_id, repo_type="model", is_private=True, is_info=False, is_rename=True, progress=gr.Progress(track_tqdm=True)):
154
  if hf_token: set_token(hf_token)
155
- else: set_token(os.environ.get("HF_TOKEN")) # default huggingface write token
156
  if not civitai_key: civitai_key = os.environ.get("CIVITAI_API_KEY") # default Civitai API key
157
  if not newrepo_id: newrepo_id = os.environ.get("HF_REPO") # default repo to upload
158
  if not get_token() or not civitai_key: raise gr.Error("HF write token and Civitai API key is required.")
159
  if not urls: urls = []
160
  dl_urls = parse_urls(dl_url)
161
  remain_urls = dl_urls.copy()
 
162
  try:
163
  md = f'### Your repo: [{newrepo_id}]({"https://huggingface.co/datasets/" if repo_type == "dataset" else "https://huggingface.co/"}{newrepo_id})\n'
164
  for u in dl_urls:
 
 
 
 
 
165
  file = download_file(u, civitai_key)
166
  if not Path(file).exists() or not Path(file).is_file(): continue
167
  if is_rename: file = get_safe_filename(file, newrepo_id, repo_type)
 
1
  import gradio as gr
2
  from huggingface_hub import HfApi, hf_hub_url
3
+ from huggingface_hub.hf_api import RepoFile
4
  import os
5
  from pathlib import Path
6
  import gc
7
  import requests
8
  from requests.adapters import HTTPAdapter
9
  from urllib3.util import Retry
10
+ import urllib
11
  from utils import (get_token, set_token, is_repo_exists, get_user_agent, get_download_file,
12
  list_uniq, list_sub, duplicate_hf_repo, HF_SUBFOLDER_NAME, get_state, set_state)
13
  import re
 
67
  return url
68
 
69
 
70
+ def get_repo_hashes(repo_id: str, repo_type: str="model"):
71
+ hf_token = get_token()
72
+ api = HfApi(token=hf_token)
73
+ hashes = []
74
+ try:
75
+ if not api.repo_exists(repo_id=repo_id, repo_type=repo_type, token=hf_token): return hashes
76
+ tree = api.list_repo_tree(repo_id=repo_id, repo_type=repo_type, token=hf_token)
77
+ for f in tree:
78
+ if not isinstance(f, RepoFile) or f.lfs is None: continue
79
+ hashes.append(f.lfs["sha256"])
80
+ except Exception as e:
81
+ print(e)
82
+ finally:
83
+ return hashes
84
+
85
+
86
+ def get_civitai_sha256(dl_url: str, api_key=""):
87
+ def is_invalid_file(qs: dict, json: dict, k: str):
88
+ return k in qs.keys() and qs[k][0] != json.get(k, None) and json.get(k, None) is not None
89
+
90
+ if "https://civitai.com/api/download/models/" not in dl_url: return None
91
+ user_agent = get_user_agent()
92
+ headers = {'User-Agent': user_agent, 'content-type': 'application/json'}
93
+ if api_key: headers['Authorization'] = f'Bearer {{{api_key}}}'
94
+ base_url = 'https://civitai.com/api/v1/model-versions/'
95
+ params = {}
96
+ session = requests.Session()
97
+ retries = Retry(total=5, backoff_factor=1, status_forcelist=[500, 502, 503, 504])
98
+ session.mount("https://", HTTPAdapter(max_retries=retries))
99
+ m = re.match(r'https://civitai.com/api/download/models/(\d+)\??(.+)?', dl_url)
100
+ if m is None: return None
101
+ url = base_url + m.group(1)
102
+ qs = urllib.parse.parse_qs(m.group(2))
103
+ if "type" not in qs.keys(): qs["type"] = ["Model"]
104
+ try:
105
+ r = session.get(url, params=params, headers=headers, stream=True, timeout=(5.0, 15))
106
+ if not r.ok: return None
107
+ json = dict(r.json())
108
+ if "files" not in json.keys() or not isinstance(json["files"], list): return None
109
+ hash = None
110
+ for d in json["files"]:
111
+ if is_invalid_file(qs, d, "type") or is_invalid_file(qs, d, "format") or is_invalid_file(qs, d, "size") or is_invalid_file(qs, d, "fp"): continue
112
+ hash = d["hashes"]["SHA256"].lower()
113
+ return hash
114
+ except Exception as e:
115
+ print(e)
116
+ return None
117
+
118
+
119
  def is_same_file(filename: str, cmp_sha256: str, cmp_size: int):
120
  if cmp_sha256:
121
  sha256_hash = hashlib.sha256()
 
203
  def download_civitai(dl_url, civitai_key, hf_token, urls,
204
  newrepo_id, repo_type="model", is_private=True, is_info=False, is_rename=True, progress=gr.Progress(track_tqdm=True)):
205
  if hf_token: set_token(hf_token)
206
+ else: set_token(os.getenv("HF_TOKEN", False)) # default huggingface write token
207
  if not civitai_key: civitai_key = os.environ.get("CIVITAI_API_KEY") # default Civitai API key
208
  if not newrepo_id: newrepo_id = os.environ.get("HF_REPO") # default repo to upload
209
  if not get_token() or not civitai_key: raise gr.Error("HF write token and Civitai API key is required.")
210
  if not urls: urls = []
211
  dl_urls = parse_urls(dl_url)
212
  remain_urls = dl_urls.copy()
213
+ hashes = set(get_repo_hashes(newrepo_id, repo_type))
214
  try:
215
  md = f'### Your repo: [{newrepo_id}]({"https://huggingface.co/datasets/" if repo_type == "dataset" else "https://huggingface.co/"}{newrepo_id})\n'
216
  for u in dl_urls:
217
+ if get_civitai_sha256(u, civitai_key) in hashes:
218
+ print(f"{u} is already exitsts. skipping.")
219
+ remain_urls.remove(u)
220
+ md += f"- Skipped [{str(u)}]({str(u)})\n"
221
+ continue
222
  file = download_file(u, civitai_key)
223
  if not Path(file).exists() or not Path(file).is_file(): continue
224
  if is_rename: file = get_safe_filename(file, newrepo_id, repo_type)
utils.py CHANGED
@@ -15,7 +15,7 @@ def get_token():
15
  try:
16
  token = HfFolder.get_token()
17
  except Exception:
18
- token = ""
19
  return token
20
 
21
 
@@ -170,6 +170,10 @@ def get_download_file(temp_dir, url, civitai_key, progress=gr.Progress(track_tqd
170
  elif Path(f"{temp_dir}/{url.split('/')[-1]}").exists():
171
  print(f"File to download alreday exists: {url}")
172
  new_file = f"{temp_dir}/{url.split('/')[-1]}"
 
 
 
 
173
  else:
174
  print(f"Start downloading: {url}")
175
  recursive = False if "huggingface.co" in url else True
 
15
  try:
16
  token = HfFolder.get_token()
17
  except Exception:
18
+ token = False
19
  return token
20
 
21
 
 
170
  elif Path(f"{temp_dir}/{url.split('/')[-1]}").exists():
171
  print(f"File to download alreday exists: {url}")
172
  new_file = f"{temp_dir}/{url.split('/')[-1]}"
173
+ elif "huggingface.co" in url:
174
+ url = url.replace("?download=true", "")
175
+ if "/blob/" in url: url = url.replace("/blob/", "/resolve/")
176
+ new_file = download_hf_file(temp_dir, url)
177
  else:
178
  print(f"Start downloading: {url}")
179
  recursive = False if "huggingface.co" in url else True