Anisha Bhatnagar commited on
Commit
0ce5cd2
·
1 Parent(s): 3ad08b5

download logic

Browse files
Files changed (3) hide show
  1. app.py +5 -0
  2. config/config.yaml +11 -0
  3. utils/file_download.py +23 -7
app.py CHANGED
@@ -29,6 +29,11 @@ cfg = load_config()
29
  download_file_override(cfg.get('interp_space_url'), cfg.get('interp_space_path'))
30
  download_file_override(cfg.get('instances_to_explain_url'), cfg.get('instances_to_explain_path'))
31
  download_file_override(cfg.get('gram2vec_feats_url'), cfg.get('gram2vec_feats_path'))
 
 
 
 
 
32
 
33
  from utils.visualizations import *
34
  from utils.llm_feat_utils import *
 
29
  download_file_override(cfg.get('interp_space_url'), cfg.get('interp_space_path'))
30
  download_file_override(cfg.get('instances_to_explain_url'), cfg.get('instances_to_explain_path'))
31
  download_file_override(cfg.get('gram2vec_feats_url'), cfg.get('gram2vec_feats_path'))
32
+ download_file_override(cfg.get('embeddings_cache_url'), cfg.get('embeddings_cache_path'))
33
+ download_file_override(cfg.get('zoom_cache_url'), cfg.get('zoom_cache_path'))
34
+ download_file_override(cfg.get('region_cache_url'), cfg.get('region_cache_path'))
35
+ download_file_override(cfg.get('tsne_cache_url'), cfg.get('tsne_cache_path'))
36
+ download_file_override(cfg.get('llm_style_features_cache_url'), cfg.get('llm_style_features_cache_path'))
37
 
38
  from utils.visualizations import *
39
  from utils.llm_feat_utils import *
config/config.yaml CHANGED
@@ -6,6 +6,17 @@ interp_space_url: "https://huggingface.co/datasets/miladalsh/explanation_tool
6
  gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
7
  gram2vec_feats_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/gram2vec_feats.csv?download=true"
8
 
 
 
 
 
 
 
 
 
 
 
 
9
  style_feat_clm: "llm_tfidf_weights"
10
  top_k: 10
11
  only_llm_feats: false
 
6
  gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
7
  gram2vec_feats_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/gram2vec_feats.csv?download=true"
8
 
9
+ embeddings_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/embeddings_cache.zip?download=true"
10
+ embeddings_cache_path: "./datasets/embeddings_cache/"
11
+ zoom_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/zoom_cache.zip?download=true"
12
+ zoom_cache_path: "./datasets/zoom_cache/"
13
+ region_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/region_cache.zip?download=true"
14
+ region_cache_path: "./datasets/region_cache/"
15
+ tsne_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/tsne_cache.pkl?download=true"
16
+ tsne_cache_path: "./datasets/tsne_cache.pkl"
17
+ llm_style_features_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/feature_spans_cache.zip?download=true"
18
+ llm_style_features_cache_path: "./datasets/feature_spans_cache/"
19
+
20
  style_feat_clm: "llm_tfidf_weights"
21
  top_k: 10
22
  only_llm_feats: false
utils/file_download.py CHANGED
@@ -46,14 +46,30 @@ def download_file_override(url: str, dest_path: str):
46
  with zipfile.ZipFile(tmp_path, 'r') as z:
47
  z.extractall(tmp_extract_dir)
48
 
49
- # Move *contents* of extracted folder into dest_path
50
- for item in os.listdir(tmp_extract_dir):
51
- src = os.path.join(tmp_extract_dir, item)
52
- dst = os.path.join(dest_path, item)
53
- if os.path.isdir(src):
54
- shutil.move(src, dst)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  else:
56
- shutil.move(src, dst)
 
 
57
 
58
  print(f"Extracted zip contents into '{dest_path}'.")
59
  else:
 
46
  with zipfile.ZipFile(tmp_path, 'r') as z:
47
  z.extractall(tmp_extract_dir)
48
 
49
+ if "cache" not in dest_path:
50
+ # Move *contents* of extracted folder into dest_path
51
+ # cache folders have a different structure, so we skip this step for them
52
+ for item in os.listdir(tmp_extract_dir):
53
+ src = os.path.join(tmp_extract_dir, item)
54
+ dst = os.path.join(dest_path, item)
55
+ if os.path.isdir(src):
56
+ shutil.move(src, dst)
57
+ else:
58
+ shutil.move(src, dst)
59
+ else:
60
+ # processing for cache folders of structure like zoom_cache.zip -> zoom_cache/zoom_cache/*
61
+ # also hold some auto generated macos metadata.
62
+ # Move the entire extracted folder into dest_path
63
+ contents = [x for x in os.listdir(tmp_extract_dir) if not x.startswith('__MACOSX')]
64
+ if len(contents) == 1 and os.path.isdir(os.path.join(tmp_extract_dir, contents[0])):
65
+ # Flatten: Only one top-level dir, move its contents
66
+ only_dir = os.path.join(tmp_extract_dir, contents[0])
67
+ for item in os.listdir(only_dir):
68
+ shutil.move(os.path.join(only_dir, item), os.path.join(dest_path, item))
69
  else:
70
+ # Usual: move everything as-is
71
+ for item in contents:
72
+ shutil.move(os.path.join(tmp_extract_dir, item), os.path.join(dest_path, item))
73
 
74
  print(f"Extracted zip contents into '{dest_path}'.")
75
  else: