tagger

Running

App Files Files Community

MonkeyJuice commited on Mar 4, 2024

Commit

7d3b3b8

1 Parent(s): c410097

change lib

Browse files

Files changed (12) hide show

.pre-commit-config.yaml +0 -36
.vscode/settings.json +23 -11
README.md +4 -4
app.py +33 -35
createTagDom.py +20 -0
cropImage.py +32 -0
genTag.py +114 -41
ignoreTag.txt +22 -15
ignoreTag2.txt +2 -1
requirements.txt +4 -4
script.js +45 -21
style.css +20 -7

.pre-commit-config.yaml DELETED Viewed

@@ -1,36 +0,0 @@
-repos:
-- repo: https://github.com/pre-commit/pre-commit-hooks
-  rev: v4.2.0
-  hooks:
-  - id: check-executables-have-shebangs
-  - id: check-json
-  - id: check-merge-conflict
-  - id: check-shebang-scripts-are-executable
-  - id: check-toml
-  - id: check-yaml
-  - id: double-quote-string-fixer
-  - id: end-of-file-fixer
-  - id: mixed-line-ending
-    args: ['--fix=lf']
-  - id: requirements-txt-fixer
-  - id: trailing-whitespace
-- repo: https://github.com/myint/docformatter
-  rev: v1.4
-  hooks:
-  - id: docformatter
-    args: ['--in-place']
-- repo: https://github.com/pycqa/isort
-  rev: 5.12.0
-  hooks:
-    - id: isort
-- repo: https://github.com/pre-commit/mirrors-mypy
-  rev: v0.991
-  hooks:
-    - id: mypy
-      args: ['--ignore-missing-imports']
-      additional_dependencies: ['types-python-slugify']
-- repo: https://github.com/google/yapf
-  rev: v0.32.0
-  hooks:
-  - id: yapf
-    args: ['--parallel', '--in-place']

.vscode/settings.json CHANGED Viewed

@@ -1,18 +1,30 @@
 {
-    "python.linting.enabled": true,
-    "python.linting.flake8Enabled": true,
-    "python.linting.pylintEnabled": false,
-    "python.linting.lintOnSave": true,
-    "python.formatting.provider": "yapf",
-    "python.formatting.yapfArgs": [
-        "--style={based_on_style: pep8, indent_width: 4, blank_line_before_nested_class_or_def: false, spaces_before_comment: 2, split_before_logical_operator: true}"
-    ],
     "[python]": {
         "editor.formatOnType": true,
         "editor.codeActionsOnSave": {
-            "source.organizeImports": true
         }
     },
-    "editor.formatOnSave": true,
-    "files.insertFinalNewline": true
 }

 {
+    "editor.formatOnSave": true,
+    "files.insertFinalNewline": false,
     "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
         "editor.formatOnType": true,
         "editor.codeActionsOnSave": {
+            "source.organizeImports": "explicit"
         }
     },
+    "[jupyter]": {
+        "files.insertFinalNewline": false
+    },
+    "black-formatter.args": [
+        "--line-length=119"
+    ],
+    "isort.args": ["--profile", "black"],
+    "flake8.args": [
+        "--max-line-length=119"
+    ],
+    "ruff.lint.args": [
+        "--line-length=119"
+    ],
+    "notebook.output.scrolling": true,
+    "notebook.formatOnCellExecution": true,
+    "notebook.formatOnSave.enabled": true,
+        "notebook.codeActionsOnSave": {
+            "source.organizeImports": "explicit"
+        }
 }

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
-title: DeepDanbooru
-emoji: 🏃
 colorFrom: gray
 colorTo: purple
 sdk: gradio
-sdk_version: 4.19.2
 app_file: app.py
 pinned: false
----

 ---
+title: Tagger
+emoji: 👀
 colorFrom: gray
 colorTo: purple
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+---

app.py CHANGED Viewed

@@ -6,35 +6,34 @@ import gradio as gr
 import PIL.Image
 import zipfile
 from genTag import genTag
 from checkIgnore import is_ignore
-def predict(image: PIL.Image.Image, score_threshold: float):
-    result_threshold = genTag(image, score_threshold)
     result_html = ''
     for label, prob in result_threshold.items():
-        if is_ignore(label, 1):
-            result_html += '<p class="m5dd_list">'
-        else:
-            result_html += '<p class="m5dd_list use">'
-        result_html = result_html + '<span>' + str(label) + '</span><span>' + str(round(prob, 3)) + '</span></p>'
     result_html = '<div>' + result_html + '</div>'
     result_filter = {key: value for key, value in result_threshold.items() if not is_ignore(key, 1)}
     result_text = '<div id="m5dd_result">' + ', '.join(result_filter.keys()) + '</div>'
-    return result_html, result_text
-def predict_batch(zip_file, score_threshold: float, progress=gr.Progress()):
     result = ''
     with zipfile.ZipFile(zip_file) as zf:
         for file in progress.tqdm(zf.namelist()):
             print(file)
-            if file.endswith(".png") or file.endswith(".jpg"):
                 image_file = zf.open(file)
                 image = PIL.Image.open(image_file)
-                image = image.convert("RGB")
-                result_threshold = genTag(image, score_threshold)
                 result_filter = {key: value for key, value in result_threshold.items() if not is_ignore(key, 2)}
                 tag = ', '.join(result_filter.keys())
-                result = result + str(file) + '\n' + str(tag) + '\n'
     return result
 with gr.Blocks(css="style.css", js="script.js") as demo:
@@ -43,45 +42,44 @@ with gr.Blocks(css="style.css", js="script.js") as demo:
             with gr.Column(scale=1):
                 image = gr.Image(label='Upload a image',
                                  type='pil',
-                                 sources=["upload", "clipboard"],
-                                 height='20em')
-                score_threshold = gr.Slider(label='Score threshold',
-                                            minimum=0,
-                                            maximum=1,
-                                            step=0.05,
-                                            value=0.5)
                 run_button = gr.Button('Run')
-                result_text = gr.HTML(value="<div></div>")
             with gr.Column(scale=2):
-                result_html = gr.HTML(value="<div></div>")
     with gr.Tab(label='Batch'):
         with gr.Row():
             with gr.Column(scale=1):
                 batch_file = gr.File(label="Upload a ZIP file containing images",
-                                     file_types=['.zip'],
-                                     height='20em')
-                score_threshold2 = gr.Slider(label='Score threshold',
-                                             minimum=0,
-                                             maximum=1,
-                                             step=0.05,
-                                             value=0.5)
                 run_button2 = gr.Button('Run')
             with gr.Column(scale=2):
-                result_text2 = gr.Textbox(lines=5,
                                           label='Result',
-                                          show_copy_button=True)
     run_button.click(
         fn=predict,
-        inputs=[image, score_threshold],
-        outputs=[result_html, result_text],
         api_name='predict',
     )
     run_button2.click(
         fn=predict_batch,
-        inputs=[batch_file, score_threshold2],
         outputs=[result_text2],
         api_name='predict_batch',
     )
-demo.queue().launch()

 import PIL.Image
 import zipfile
 from genTag import genTag
+from cropImage import cropImage
 from checkIgnore import is_ignore
+from createTagDom import create_tag_dom
+def predict(image: PIL.Image.Image):
+    result_threshold = genTag(image, 0.5)
     result_html = ''
     for label, prob in result_threshold.items():
+        result_html += create_tag_dom(label, is_ignore(label, 1), prob)
     result_html = '<div>' + result_html + '</div>'
     result_filter = {key: value for key, value in result_threshold.items() if not is_ignore(key, 1)}
     result_text = '<div id="m5dd_result">' + ', '.join(result_filter.keys()) + '</div>'
+    crop_image = cropImage(image)
+    return result_html, result_text, crop_image
+def predict_batch(zip_file, progress=gr.Progress()):
     result = ''
     with zipfile.ZipFile(zip_file) as zf:
         for file in progress.tqdm(zf.namelist()):
             print(file)
+            if file.endswith(".png") or file.endswith(".jpg") or file.endswith(".jpeg"):
                 image_file = zf.open(file)
                 image = PIL.Image.open(image_file)
+                image = image.convert("RGBA")
+                result_threshold = genTag(image, 0.5)
                 result_filter = {key: value for key, value in result_threshold.items() if not is_ignore(key, 2)}
                 tag = ', '.join(result_filter.keys())
+                result = result + str(file) + '\n' + str(tag) + '\n\n'
     return result
 with gr.Blocks(css="style.css", js="script.js") as demo:
             with gr.Column(scale=1):
                 image = gr.Image(label='Upload a image',
                                  type='pil',
+                                 elem_classes='m5dd_image',
+                                 image_mode="RGBA",
+                                 sources=["upload", "clipboard"])
                 run_button = gr.Button('Run')
+                with gr.Accordion(label="Crop Image", open=False):
+                    crop_image = gr.Image(elem_classes='m5dd_image2',
+                                          format='jpg',
+                                          show_label=False,
+                                          show_share_button=False,
+                                          container=False)
+                result_text = gr.HTML(value="")
             with gr.Column(scale=2):
+                result_html = gr.HTML(value="")
     with gr.Tab(label='Batch'):
         with gr.Row():
             with gr.Column(scale=1):
                 batch_file = gr.File(label="Upload a ZIP file containing images",
+                                     file_types=['.zip'])
                 run_button2 = gr.Button('Run')
             with gr.Column(scale=2):
+                result_text2 = gr.Textbox(lines=20,
+                                          max_lines=20,
                                           label='Result',
+                                          show_copy_button=True,
+                                          autoscroll=False)
     run_button.click(
         fn=predict,
+        inputs=[image],
+        outputs=[result_html, result_text, crop_image],
         api_name='predict',
     )
     run_button2.click(
         fn=predict_batch,
+        inputs=[batch_file],
         outputs=[result_text2],
         api_name='predict_batch',
     )
+if __name__ == "__main__":
+    demo.queue(max_size=20).launch()

createTagDom.py ADDED Viewed

	@@ -0,0 +1,20 @@

+#!/usr/bin/env python
+from __future__ import annotations
+def create_tag_dom(label, ignore, prob):
+    result_html = ''
+    if ignore:
+        result_html += '<div class="m5dd_list">'
+    else:
+        result_html += '<div class="m5dd_list use">'
+    result_html += '<span class="add action">➕</span>'
+    result_html += '<span class="dec action">➖</span>'
+    result_html += '<span class="label action">' + str(label) + '</span>'
+    result_html += '<span class="prob">' + str(round(prob, 3)) + '</span>'
+    result_html += '<span class="up action">🔼</span>'
+    result_html += '<span class="down action">🔽</span>'
+    result_html += '<a class="wiki action" href="https://danbooru.donmai.us/wiki_pages/' + label + '" target="_blank">📙</a>'
+    result_html += '</div>'
+    return result_html

cropImage.py ADDED Viewed

	@@ -0,0 +1,32 @@

+#!/usr/bin/env python
+from __future__ import annotations
+import PIL.Image
+def cropImage(image: PIL.Image.Image):
+    original_width, original_height = image.size
+    scale = max(original_width, original_height) / min(original_width, original_height)
+    target_width = 512
+    target_height = 768
+    if scale < 1.1:
+        target_width = 640
+        target_height = 640
+    elif original_width > original_height:
+        target_width = 768
+        target_height = 512
+    if original_width / original_height > target_width / target_height:
+        new_width = int(original_height * (target_width / target_height))
+        crop_box = ((original_width - new_width) // 2, 0, (original_width + new_width) // 2, original_height)
+    else:
+        new_height = int(original_width * (target_height / target_width))
+        crop_box = (0, (original_height - new_height) // 2, original_width, (original_height + new_height) // 2)
+    cropped_image = image.convert("RGB")
+    cropped_image = cropped_image.crop(crop_box)
+    cropped_image = cropped_image.resize((target_width, target_height))
+    return cropped_image

genTag.py CHANGED Viewed

@@ -2,54 +2,127 @@
 from __future__ import annotations
-import deepdanbooru as dd
 import huggingface_hub
 import numpy as np
-import PIL.Image
-import tensorflow as tf
-def load_model() -> tf.keras.Model:
-    path = huggingface_hub.hf_hub_download('public-data/DeepDanbooru',
-                                           'model-resnet_custom_v3.h5')
-    model = tf.keras.models.load_model(path)
-    return model
-def load_labels() -> list[str]:
-    path = huggingface_hub.hf_hub_download('public-data/DeepDanbooru',
-                                           'tags.txt')
-    with open(path) as f:
-        labels = [line.strip() for line in f.readlines()]
-    return labels
-model = load_model()
-labels = load_labels()
 def genTag(image: PIL.Image.Image, score_threshold: float):
-    _, height, width, _ = model.input_shape
-    image = np.asarray(image)
-    image = tf.image.resize(image,
-                            size=(height, width),
-                            method=tf.image.ResizeMethod.AREA,
-                            preserve_aspect_ratio=True)
-    image = image.numpy()
-    image = dd.image.transform_and_pad_image(image, width, height)
-    image = image / 255.
-    probs = model.predict(image[None, ...])[0]
-    probs = probs.astype(float)
-    indices = np.argsort(probs)[::-1]
-    result_all = dict()
-    result_threshold = dict()
-    result_html = ''
-    for index in indices:
-        label = labels[index]
-        prob = probs[index]
-        result_all[label] = prob
-        if prob < score_threshold:
-            break
-        result_threshold[label] = prob
-    return result_threshold

 from __future__ import annotations
+import gradio as gr
 import huggingface_hub
 import numpy as np
+import onnxruntime as rt
+import pandas as pd
+from PIL import Image
+EVA02_LARGE_MODEL_DSV3_REPO = "SmilingWolf/wd-eva02-large-tagger-v3"
+MODEL_FILENAME = "model.onnx"
+LABEL_FILENAME = "selected_tags.csv"
+def load_labels(dataframe) -> list[str]:
+    name_series = dataframe["name"]
+    tag_names = name_series.tolist()
+    rating_indexes = list(np.where(dataframe["category"] == 9)[0])
+    general_indexes = list(np.where(dataframe["category"] == 0)[0])
+    character_indexes = list(np.where(dataframe["category"] == 4)[0])
+    return tag_names, rating_indexes, general_indexes, character_indexes
+class Predictor:
+    def __init__(self):
+        self.model_target_size = None
+        self.load_model(EVA02_LARGE_MODEL_DSV3_REPO)
+    def download_model(self, model_repo):
+        csv_path = huggingface_hub.hf_hub_download(
+            model_repo,
+            LABEL_FILENAME,
+        )
+        model_path = huggingface_hub.hf_hub_download(
+            model_repo,
+            MODEL_FILENAME,
+        )
+        return csv_path, model_path
+    def load_model(self, model_repo):
+        csv_path, model_path = self.download_model(model_repo)
+        tags_df = pd.read_csv(csv_path)
+        sep_tags = load_labels(tags_df)
+        self.tag_names = sep_tags[0]
+        self.rating_indexes = sep_tags[1]
+        self.general_indexes = sep_tags[2]
+        self.character_indexes = sep_tags[3]
+        model = rt.InferenceSession(model_path)
+        _, height, width, _ = model.get_inputs()[0].shape
+        self.model_target_size = height
+        self.model = model
+    def prepare_image(self, image):
+        target_size = self.model_target_size
+        canvas = Image.new("RGBA", image.size, (255, 255, 255))
+        canvas.alpha_composite(image)
+        image = canvas.convert("RGB")
+        # Pad image to square
+        image_shape = image.size
+        max_dim = max(image_shape)
+        pad_left = (max_dim - image_shape[0]) // 2
+        pad_top = (max_dim - image_shape[1]) // 2
+        padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
+        padded_image.paste(image, (pad_left, pad_top))
+        # Resize
+        if max_dim != target_size:
+            padded_image = padded_image.resize(
+                (target_size, target_size),
+                Image.BICUBIC,
+            )
+        # Convert to numpy array
+        image_array = np.asarray(padded_image, dtype=np.float32)
+        # Convert PIL-native RGB to BGR
+        image_array = image_array[:, :, ::-1]
+        return np.expand_dims(image_array, axis=0)
+    def predict(self, image, general_thresh):
+        image = self.prepare_image(image)
+        input_name = self.model.get_inputs()[0].name
+        label_name = self.model.get_outputs()[0].name
+        preds = self.model.run([label_name], {input_name: image})[0]
+        labels = list(zip(self.tag_names, preds[0].astype(float)))
+        # First 4 labels are actually ratings: pick one with argmax
+        ratings_names = [labels[i] for i in self.rating_indexes]
+        ratings_names = dict(ratings_names)
+        ratings_names = sorted(
+            ratings_names.items(),
+            key=lambda x: x[1],
+            reverse=True,
+        )
+        # Then we have general tags: pick any where prediction confidence > threshold
+        general_names = [labels[i] for i in self.general_indexes]
+        general_res = [x for x in general_names if x[1] > general_thresh]
+        general_res = dict(general_res)
+        ratings = "rating:" + ratings_names[0][0]
+        if ratings_names[0][0] == "general":
+            ratings = "rating:safe"
+        general_res[ratings] = ratings_names[0][1]
+        general_res = sorted(
+            general_res.items(),
+            key=lambda x: x[1],
+            reverse=True,
+        )
+        return dict(general_res)
+predictor = Predictor()
 def genTag(image: PIL.Image.Image, score_threshold: float):
+    return predictor.predict(image, score_threshold)

ignoreTag.txt CHANGED Viewed

@@ -5,6 +5,8 @@ convenient_censoring,
 bar_censor,
 heart_censor,
 censored,
 twitter_username,
 patreon_username,
 signature,
@@ -13,20 +15,13 @@ artist_name,
 character_name,
 copyright_name,
 artist_name,
-virtual_youtuber,
-eyebrows_visible_through_hair,
-eyes_visible_through_hair,
-hair_between_eyes,
 web_address,
-bangs,
 monochrome,
 letterboxed,
-bad_feet,
-oekaki,
-holding_hands,
-nail_polish,
-sandwiched,
-symbol-shaped_pupils,
 greyscale,
 sketch,
 speech_bubble,
@@ -36,15 +31,12 @@ spoken_question_mark,
 spoken_sweatdrop,
 spoken_squiggle,
 spoken_object,
-letterboxed,
 spoken_interrobang,
 spoken_exclamation_mark,
 spoken_anger_vein,
 spoken_blush,
 thought_bubble,
-toe_scrunch,
-character_censor,
-novelty_censor,
 aqua_nails,
 black_nails,
 green_nails,
@@ -59,4 +51,19 @@ toenail_polish,
 toenails,
 yellow_nails,
 blue_nails,
 interlocked_fingers,

 bar_censor,
 heart_censor,
 censored,
+character_censor,
+novelty_censor,
 twitter_username,
 patreon_username,
 signature,
 character_name,
 copyright_name,
 artist_name,
 web_address,
+qr_code,
+virtual_youtuber,
+loli,
 monochrome,
 letterboxed,
+realistic,
 greyscale,
 sketch,
 speech_bubble,
 spoken_sweatdrop,
 spoken_squiggle,
 spoken_object,
 spoken_interrobang,
 spoken_exclamation_mark,
 spoken_anger_vein,
 spoken_blush,
 thought_bubble,
+nail_polish,
 aqua_nails,
 black_nails,
 green_nails,
 toenails,
 yellow_nails,
 blue_nails,
+eyebrows_visible_through_hair,
+eyes_visible_through_hair,
+hair_between_eyes,
+bangs,
+symbol-shaped_pupils,
+toe_scrunch,
+bad_feet,
+oekaki,
+holding_hands,
+sandwiched,
+mole,
+navel,
 interlocked_fingers,
+striped_background,
+striped,
+vertical_stripes,

ignoreTag2.txt CHANGED Viewed

@@ -1,3 +1,4 @@
 rating:safe,
 rating:questionable,
-rating:explicit,

 rating:safe,
+rating:sensitive,
 rating:questionable,
+rating:explicit,

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-git+https://github.com/KichangKim/DeepDanbooru@v3-20200915-sgd-e30#egg=deepdanbooru
-pillow==10.0.0
-pydantic==1.10.11
-tensorflow==2.13.0

+pillow==10.2.0
+tensorflow==2.15.0.post1
+onnxruntime>=1.12.0
+huggingface-hub

script.js CHANGED Viewed

@@ -1,24 +1,48 @@
-document.addEventListener('click', function (event) {
-  let tagItem = event.target.closest('.m5dd_list')
-  let resultArea = event.target.closest('#m5dd_result')
-  if (tagItem) {
-    if (tagItem.classList.contains('use')) {
-      tagItem.classList.remove('use')
     } else {
-      tagItem.classList.add('use')
     }
-    document.getElementById('m5dd_result').innerText =
-      Array.from(document.querySelectorAll('.m5dd_list.use>span:nth-child(1)'))
-        .map(v => v.innerText)
-        .join(', ')
-  } else if (resultArea) {
-    const selection = window.getSelection()
-    selection.removeAllRanges()
-    const range = document.createRange()
-    range.selectNodeContents(resultArea)
-    selection.addRange(range)
-  } else {
-    return
-  }
-})

+async () => {
+  document.addEventListener('click', function (event) {
+    let tagItem = event.target.closest('.m5dd_list')
+    let resultArea = event.target.closest('#m5dd_result')
+    if (tagItem) {
+      let labelItem = event.target.closest('span.label')
+      let upItem = event.target.closest('span.up')
+      let downItem = event.target.closest('span.down')
+      let addItem = event.target.closest('span.add')
+      let decItem = event.target.closest('span.dec')
+      let actionItem = event.target.closest('span.action')
+      if (labelItem) {
+        if (tagItem.classList.contains('use')) {
+          tagItem.classList.remove('use')
+        } else {
+          tagItem.classList.add('use')
+        }
+      }
+      if (upItem) { }
+      if (downItem) { }
+      if (addItem) {
+        let label = tagItem.querySelector('span.label').innerText
+        tagItem.querySelector('span.label').innerText = `(${label})`
+      }
+      if (decItem) {
+        let label = tagItem.querySelector('span.label').innerText
+        label = label.replace(/^\s*\(\s*(.+?)\s*\)\s*$/, '$1')
+        tagItem.querySelector('span.label').innerText = label
+      }
+      if (actionItem) {
+        document.getElementById('m5dd_result').innerText =
+          Array.from(document.querySelectorAll('.m5dd_list.use>span.label'))
+            .map(v => v.innerText)
+            .join(', ')
+      }
+    } else if (resultArea) {
+      const selection = window.getSelection()
+      selection.removeAllRanges()
+      const range = document.createRange()
+      range.selectNodeContents(resultArea)
+      selection.addRange(range)
     } else {
+      return
     }
+  })
+}

style.css CHANGED Viewed

@@ -2,22 +2,35 @@
   display: flex;
   cursor: pointer;
   font-size: 1.2em;
-  padding: 0.2em 0.5em;
 }
-.m5dd_list>span:nth-child(1) {
   flex: 1;
 }
-.m5dd_list>span:nth-child(2) {
   color: #aaa;
 }
-.m5dd_list:nth-child(even) {
-  background: #ecedf0;
-}
-.m5dd_list:not(.use)>span {
   text-decoration: line-through;
   color: #ccc;
 }

   display: flex;
   cursor: pointer;
   font-size: 1.2em;
+  padding: .5em;
+  user-select: none;
 }
+.m5dd_list>span.label {
   flex: 1;
+  padding: 0 .5em;
 }
+.m5dd_list>span.prob {
   color: #aaa;
+  padding: 0 .5em;
 }
+.m5dd_list:nth-child(even) { background: #ECEDF0; }
+.dark .m5dd_list:nth-child(even) { background: #1F2937; }
+.m5dd_list:not(.use)>span.label {
   text-decoration: line-through;
   color: #ccc;
 }
+.m5dd_image .upload-container .image-frame {
+  height: 20em;
+}
+.m5dd_image .upload-container .image-frame {
+  height: 20em;
+}
+.m5dd_image2 .image-container {
+  height: 20em;
+}