Spaces:

monetjoe
/

cv_backbones

Running

App Files Files

admin commited on Nov 4, 2024

Commit

1efa705

1 Parent(s): 95df7de

upd gradio

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +46 -75

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🖥️👀
 colorFrom: pink
 colorTo: indigo
 sdk: gradio
-sdk_version: 4.36.0
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: pink
 colorTo: indigo
 sdk: gradio
+sdk_version: 4.44.1
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -7,123 +7,105 @@ import pandas as pd
 from tqdm import tqdm
 from bs4 import BeautifulSoup
-cache_json = 'cv_backbones.json'
 def parse_url(url):
     response = requests.get(url)
     html = response.text
-    return BeautifulSoup(html, 'html.parser')
 def special_type(m_ver):
-    m_type = re.search('[a-zA-Z]+', m_ver).group(0)
-    if m_type == 'wide' or m_type == 'resnext':
-        return 'resnet'
-    elif m_type == 'swin':
-        return 'swin_transformer'
-    elif m_type == 'inception':
-        return 'googlenet'
     return m_type
 def info_on_dataset(m_ver, m_type, in1k_span):
-    url_span = in1k_span.find_next_sibling('span', {'class': 's2'})
-    size_span = url_span.find_next_sibling('span', {'class': 'mi'})
     m_url = str(url_span.text[1:-1])
     input_size = int(size_span.text)
-    m_dict = {
-        'ver': m_ver,
-        'type': m_type,
-        'input_size': input_size,
-        'url': m_url
-    }
     return m_dict, size_span
-def gen_dataframe(url='https://pytorch.org/vision/main/_modules/'):
     torch_page = parse_url(url)
-    article = torch_page.find('article', {'id': 'pytorch-article'})
-    ul = article.find('ul').find('ul')
     in1k_v1, in1k_v2 = [], []
-    for li in tqdm(ul.find_all('li'), desc='Crawling cv backbone info...'):
         name = str(li.text)
-        if name.__contains__('torchvision.models.') and len(name.split('.')) == 3:
-            if name.__contains__('_api') or \
-                name.__contains__('feature_extraction') or \
-                    name.__contains__('maxvit'):
                 continue
-            href = li.find('a').get('href')
             model_page = parse_url(url + href)
-            divs = model_page.select('div.viewcode-block')
             for div in divs:
-                div_id = str(div['id'])
-                if div_id.__contains__('_Weights'):
-                    m_ver = div_id.split('_Weight')[0].lower()
-                    if m_ver.__contains__('swin_v2_'):
                         continue
                     m_type = special_type(m_ver)
                     in1k_v1_span = div.find(
-                        name='span',
-                        attrs={'class': 'n'},
-                        string='IMAGENET1K_V1'
                     )
                     if not in1k_v1_span:
                         continue
-                    m_dict, size_span = info_on_dataset(
-                        m_ver,
-                        m_type,
-                        in1k_v1_span
-                    )
                     in1k_v1.append(m_dict)
                     in1k_v2_span = size_span.find_next_sibling(
-                        name='span',
-                        attrs={'class': 'n'},
-                        string='IMAGENET1K_V2'
                     )
                     if in1k_v2_span:
-                        m_dict, _ = info_on_dataset(
-                            m_ver,
-                            m_type,
-                            in1k_v2_span
-                        )
                         in1k_v2.append(m_dict)
-    dataset = {
-        'IMAGENET1K_V1': in1k_v1,
-        'IMAGENET1K_V2': in1k_v2
-    }
-    with open('IMAGENET1K_V1.jsonl', 'w', encoding='utf-8') as jsonl_file:
         for item in in1k_v1:
-            jsonl_file.write(json.dumps(item) + '\n')
-    with open('IMAGENET1K_V2.jsonl', 'w', encoding='utf-8') as jsonl_file:
         for item in in1k_v2:
-            jsonl_file.write(json.dumps(item) + '\n')
     return dataset
 def inference(subset):
-    cache_json = f'{subset}.jsonl'
     if os.path.exists(cache_json):
-        with open(cache_json, 'r', encoding='utf-8') as jsonl_file:
             dataset = [json.loads(line) for line in jsonl_file]
     else:
         dataset = gen_dataframe()[subset]
@@ -132,7 +114,7 @@ def inference(subset):
 def sync(subset):
-    cache_json = f'{subset}.jsonl'
     if os.path.exists(cache_json):
         os.remove(cache_json)
@@ -142,26 +124,15 @@ def sync(subset):
 with gr.Blocks() as demo:
     with gr.Row():
         subset_opt = gr.Dropdown(
-            choices=['IMAGENET1K_V1', 'IMAGENET1K_V2'],
-            value='IMAGENET1K_V1'
         )
         sync_btn = gr.Button("Clean cache")
         dld_file = gr.components.File(label="Download JSON lines")
     with gr.Row():
-        data_frame = gr.Dataframe(
-            headers=["ver", "type", "input_size", "url"]
-        )
-    subset_opt.change(
-        inference,
-        inputs=subset_opt,
-        outputs=[data_frame, dld_file]
-    )
-    sync_btn.click(
-        sync,
-        inputs=subset_opt,
-        outputs=dld_file
-    )
-demo.launch(share=True)

 from tqdm import tqdm
 from bs4 import BeautifulSoup
+cache_json = "cv_backbones.json"
 def parse_url(url):
     response = requests.get(url)
     html = response.text
+    return BeautifulSoup(html, "html.parser")
 def special_type(m_ver):
+    m_type = re.search("[a-zA-Z]+", m_ver).group(0)
+    if m_type == "wide" or m_type == "resnext":
+        return "resnet"
+    elif m_type == "swin":
+        return "swin_transformer"
+    elif m_type == "inception":
+        return "googlenet"
     return m_type
 def info_on_dataset(m_ver, m_type, in1k_span):
+    url_span = in1k_span.find_next_sibling("span", {"class": "s2"})
+    size_span = url_span.find_next_sibling("span", {"class": "mi"})
     m_url = str(url_span.text[1:-1])
     input_size = int(size_span.text)
+    m_dict = {"ver": m_ver, "type": m_type, "input_size": input_size, "url": m_url}
     return m_dict, size_span
+def gen_dataframe(url="https://pytorch.org/vision/main/_modules/"):
     torch_page = parse_url(url)
+    article = torch_page.find("article", {"id": "pytorch-article"})
+    ul = article.find("ul").find("ul")
     in1k_v1, in1k_v2 = [], []
+    for li in tqdm(ul.find_all("li"), desc="Crawling cv backbone info..."):
         name = str(li.text)
+        if name.__contains__("torchvision.models.") and len(name.split(".")) == 3:
+            if (
+                name.__contains__("_api")
+                or name.__contains__("feature_extraction")
+                or name.__contains__("maxvit")
+            ):
                 continue
+            href = li.find("a").get("href")
             model_page = parse_url(url + href)
+            divs = model_page.select("div.viewcode-block")
             for div in divs:
+                div_id = str(div["id"])
+                if div_id.__contains__("_Weights"):
+                    m_ver = div_id.split("_Weight")[0].lower()
+                    if m_ver.__contains__("swin_v2_"):
                         continue
                     m_type = special_type(m_ver)
                     in1k_v1_span = div.find(
+                        name="span", attrs={"class": "n"}, string="IMAGENET1K_V1"
                     )
                     if not in1k_v1_span:
                         continue
+                    m_dict, size_span = info_on_dataset(m_ver, m_type, in1k_v1_span)
                     in1k_v1.append(m_dict)
                     in1k_v2_span = size_span.find_next_sibling(
+                        name="span", attrs={"class": "n"}, string="IMAGENET1K_V2"
                     )
                     if in1k_v2_span:
+                        m_dict, _ = info_on_dataset(m_ver, m_type, in1k_v2_span)
                         in1k_v2.append(m_dict)
+    dataset = {"IMAGENET1K_V1": in1k_v1, "IMAGENET1K_V2": in1k_v2}
+    with open("IMAGENET1K_V1.jsonl", "w", encoding="utf-8") as jsonl_file:
         for item in in1k_v1:
+            jsonl_file.write(json.dumps(item) + "\n")
+    with open("IMAGENET1K_V2.jsonl", "w", encoding="utf-8") as jsonl_file:
         for item in in1k_v2:
+            jsonl_file.write(json.dumps(item) + "\n")
     return dataset
 def inference(subset):
+    cache_json = f"{subset}.jsonl"
     if os.path.exists(cache_json):
+        with open(cache_json, "r", encoding="utf-8") as jsonl_file:
             dataset = [json.loads(line) for line in jsonl_file]
     else:
         dataset = gen_dataframe()[subset]
 def sync(subset):
+    cache_json = f"{subset}.jsonl"
     if os.path.exists(cache_json):
         os.remove(cache_json)
 with gr.Blocks() as demo:
     with gr.Row():
         subset_opt = gr.Dropdown(
+            choices=["IMAGENET1K_V1", "IMAGENET1K_V2"], value="IMAGENET1K_V1"
         )
         sync_btn = gr.Button("Clean cache")
         dld_file = gr.components.File(label="Download JSON lines")
     with gr.Row():
+        data_frame = gr.Dataframe(headers=["ver", "type", "input_size", "url"])
+    subset_opt.change(inference, inputs=subset_opt, outputs=[data_frame, dld_file])
+    sync_btn.click(sync, inputs=subset_opt, outputs=dld_file)
+demo.launch()