admin commited on
Commit
2463977
·
1 Parent(s): bceef23
Files changed (4) hide show
  1. .gitattributes +10 -11
  2. .gitignore +3 -0
  3. app.py +167 -0
  4. requirements.txt +4 -0
.gitattributes CHANGED
@@ -1,35 +1,34 @@
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
 
4
  *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
  *.model filter=lfs diff=lfs merge=lfs -text
13
  *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
  *.onnx filter=lfs diff=lfs merge=lfs -text
17
  *.ot filter=lfs diff=lfs merge=lfs -text
18
  *.parquet filter=lfs diff=lfs merge=lfs -text
19
  *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
  *.pt filter=lfs diff=lfs merge=lfs -text
23
  *.pth filter=lfs diff=lfs merge=lfs -text
24
  *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
  *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
  *.tflite filter=lfs diff=lfs merge=lfs -text
30
  *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
1
  *.7z filter=lfs diff=lfs merge=lfs -text
2
  *.arrow filter=lfs diff=lfs merge=lfs -text
3
  *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
  *.bz2 filter=lfs diff=lfs merge=lfs -text
 
6
  *.ftz filter=lfs diff=lfs merge=lfs -text
7
  *.gz filter=lfs diff=lfs merge=lfs -text
8
  *.h5 filter=lfs diff=lfs merge=lfs -text
9
  *.joblib filter=lfs diff=lfs merge=lfs -text
10
  *.lfs.* filter=lfs diff=lfs merge=lfs -text
 
11
  *.model filter=lfs diff=lfs merge=lfs -text
12
  *.msgpack filter=lfs diff=lfs merge=lfs -text
 
 
13
  *.onnx filter=lfs diff=lfs merge=lfs -text
14
  *.ot filter=lfs diff=lfs merge=lfs -text
15
  *.parquet filter=lfs diff=lfs merge=lfs -text
16
  *.pb filter=lfs diff=lfs merge=lfs -text
 
 
17
  *.pt filter=lfs diff=lfs merge=lfs -text
18
  *.pth filter=lfs diff=lfs merge=lfs -text
19
  *.rar filter=lfs diff=lfs merge=lfs -text
 
20
  saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
  *.tar.* filter=lfs diff=lfs merge=lfs -text
 
22
  *.tflite filter=lfs diff=lfs merge=lfs -text
23
  *.tgz filter=lfs diff=lfs merge=lfs -text
 
24
  *.xz filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *.tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ *.db* filter=lfs diff=lfs merge=lfs -text
29
+ *.ark* filter=lfs diff=lfs merge=lfs -text
30
+ **/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
31
+ **/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
32
+ **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
33
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
34
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ *.jsonl
2
+ test.py
3
+ flagged/*
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ import requests
5
+ import gradio as gr
6
+ import pandas as pd
7
+ from tqdm import tqdm
8
+ from bs4 import BeautifulSoup
9
+
10
+ cache_json = 'cv_backbones.json'
11
+
12
+
13
+ def parse_url(url):
14
+ response = requests.get(url)
15
+ html = response.text
16
+ return BeautifulSoup(html, 'html.parser')
17
+
18
+
19
+ def special_type(m_ver):
20
+ m_type = re.search('[a-zA-Z]+', m_ver).group(0)
21
+
22
+ if m_type == 'wide' or m_type == 'resnext':
23
+ return 'resnet'
24
+
25
+ elif m_type == 'swin':
26
+ return 'swin_transformer'
27
+
28
+ elif m_type == 'inception':
29
+ return 'googlenet'
30
+
31
+ return m_type
32
+
33
+
34
+ def info_on_dataset(m_ver, m_type, in1k_span):
35
+ url_span = in1k_span.find_next_sibling('span', {'class': 's2'})
36
+ size_span = url_span.find_next_sibling('span', {'class': 'mi'})
37
+ m_url = str(url_span.text[1:-1])
38
+ input_size = int(size_span.text)
39
+ m_dict = {
40
+ 'ver': m_ver,
41
+ 'type': m_type,
42
+ 'input_size': input_size,
43
+ 'url': m_url
44
+ }
45
+ return m_dict, size_span
46
+
47
+
48
+ def gen_dataframe(url='https://pytorch.org/vision/main/_modules/'):
49
+ torch_page = parse_url(url)
50
+ article = torch_page.find('article', {'id': 'pytorch-article'})
51
+ ul = article.find('ul').find('ul')
52
+ in1k_v1, in1k_v2 = [], []
53
+
54
+ for li in tqdm(ul.find_all('li'), desc='Crawling cv backbone info...'):
55
+ name = str(li.text)
56
+ if name.__contains__('torchvision.models.') and len(name.split('.')) == 3:
57
+
58
+ if name.__contains__('_api') or \
59
+ name.__contains__('feature_extraction') or \
60
+ name.__contains__('maxvit'):
61
+ continue
62
+
63
+ href = li.find('a').get('href')
64
+ model_page = parse_url(url + href)
65
+ divs = model_page.select('div.viewcode-block')
66
+
67
+ for div in divs:
68
+ div_id = str(div['id'])
69
+ if div_id.__contains__('_Weights'):
70
+ m_ver = div_id.split('_Weight')[0].lower()
71
+
72
+ if m_ver.__contains__('swin_v2_'):
73
+ continue
74
+
75
+ m_type = special_type(m_ver)
76
+
77
+ in1k_v1_span = div.find(
78
+ name='span',
79
+ attrs={'class': 'n'},
80
+ string='IMAGENET1K_V1'
81
+ )
82
+
83
+ if not in1k_v1_span:
84
+ continue
85
+
86
+ m_dict, size_span = info_on_dataset(
87
+ m_ver,
88
+ m_type,
89
+ in1k_v1_span
90
+ )
91
+ in1k_v1.append(m_dict)
92
+
93
+ in1k_v2_span = size_span.find_next_sibling(
94
+ name='span',
95
+ attrs={'class': 'n'},
96
+ string='IMAGENET1K_V2'
97
+ )
98
+
99
+ if in1k_v2_span:
100
+ m_dict, _ = info_on_dataset(
101
+ m_ver,
102
+ m_type,
103
+ in1k_v2_span
104
+ )
105
+ in1k_v2.append(m_dict)
106
+
107
+ dataset = {
108
+ 'IMAGENET1K_V1': in1k_v1,
109
+ 'IMAGENET1K_V2': in1k_v2
110
+ }
111
+
112
+ with open('IMAGENET1K_V1.jsonl', 'w', encoding='utf-8') as jsonl_file:
113
+ for item in in1k_v1:
114
+ jsonl_file.write(json.dumps(item) + '\n')
115
+
116
+ with open('IMAGENET1K_V2.jsonl', 'w', encoding='utf-8') as jsonl_file:
117
+ for item in in1k_v2:
118
+ jsonl_file.write(json.dumps(item) + '\n')
119
+
120
+ return dataset
121
+
122
+
123
+ def inference(subset):
124
+ cache_json = f'{subset}.jsonl'
125
+ if os.path.exists(cache_json):
126
+ with open(cache_json, 'r', encoding='utf-8') as jsonl_file:
127
+ dataset = [json.loads(line) for line in jsonl_file]
128
+ else:
129
+ dataset = gen_dataframe()[subset]
130
+
131
+ return pd.DataFrame(dataset), cache_json
132
+
133
+
134
+ def sync(subset):
135
+ cache_json = f'{subset}.jsonl'
136
+ if os.path.exists(cache_json):
137
+ os.remove(cache_json)
138
+
139
+ return None
140
+
141
+
142
+ with gr.Blocks() as demo:
143
+ with gr.Row():
144
+ subset_opt = gr.Dropdown(
145
+ choices=['IMAGENET1K_V1', 'IMAGENET1K_V2'],
146
+ value='IMAGENET1K_V1'
147
+ )
148
+ sync_btn = gr.Button("Clean cache")
149
+ dld_file = gr.components.File(label="Download JSON")
150
+
151
+ with gr.Row():
152
+ data_frame = gr.Dataframe(
153
+ headers=["ver", "type", "input_size", "url"]
154
+ )
155
+
156
+ subset_opt.change(
157
+ inference,
158
+ inputs=subset_opt,
159
+ outputs=[data_frame, dld_file]
160
+ )
161
+ sync_btn.click(
162
+ sync,
163
+ inputs=subset_opt,
164
+ outputs=dld_file
165
+ )
166
+
167
+ demo.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ tqdm
3
+ bs4
4
+ requests