nl-to-tag-jp-test

Runtime error

App Files Files Community

p1atdev commited on Feb 15

Commit

dfbfb4f

verified ·

1 Parent(s): 3b0a6b8

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -14

app.py CHANGED Viewed

@@ -1,3 +1,18 @@
 import os
 import time
 import spaces
@@ -24,10 +39,6 @@ DEVICE = (
 BAD_WORD_KEYWORDS = ["(medium)"]
-def fix_compiled_state_dict(state_dict: dict):
-    return {k.replace("._orig_mod.", "."): v for k, v in state_dict.items()}
 def get_bad_words_ids(tokenizer: PreTrainedTokenizerFast):
     ids = [
         [id]
@@ -38,17 +49,12 @@ def get_bad_words_ids(tokenizer: PreTrainedTokenizerFast):
 def prepare_models():
-    config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
-    model = AutoModelForPreTraining.from_config(
-        config, torch_dtype=torch.bfloat16, trust_remote_code=True
     )
     model.decoder_model.use_cache = True
     processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
-    state_dict = load_file(MODEL_PATH)
-    state_dict = {k.replace("._orig_mod.", "."): v for k, v in state_dict.items()}
-    model.load_state_dict(state_dict)
     model.eval()
     model = model.to(DEVICE)
     # model = torch.compile(model)
@@ -60,11 +66,17 @@ def demo():
     model, processor = prepare_models()
     ban_ids = get_bad_words_ids(processor.decoder_tokenizer)
     @spaces.GPU(duration=5)
     @torch.inference_mode()
     def generate_tags(
         text: str,
         auto_detect: bool,
         copyright_tags: str = "",
         length: str = "short",
         max_new_tokens: int = 128,
@@ -77,7 +89,7 @@ def demo():
             "<|bos|>"
             f"<|aspect_ratio:tall|><|rating:general|><|length:{length}|>"
             "<|reserved_2|><|reserved_3|><|reserved_4|>"
-            "<|translate:exact|><|input_end|>"
             "<copyright>" + copyright_tags.strip()
         )
         if not auto_detect:
@@ -146,6 +158,11 @@ def demo():
                         ],
                         value="short",
                     )
                     translate_btn = gr.Button(value="Translate", variant="primary")
                     with gr.Accordion(label="Advanced", open=False):
@@ -174,7 +191,8 @@ def demo():
                         )
                 with gr.Column():
-                    output = gr.Textbox(label="Output", lines=4, interactive=False)
                     time_elapsed = gr.Markdown(value="")
             gr.Examples(
@@ -239,6 +257,7 @@ def demo():
             inputs=[
                 text,
                 auto_detect,
                 copyright_tags,
                 length,
                 max_new_tokens,
@@ -247,7 +266,7 @@ def demo():
                 top_k,
                 top_p,
             ],
-            outputs=[output, time_elapsed],
         )
     ui.launch()

+try:
+    import flash_attn
+except:
+    import subprocess
+    print("Installing flash-attn...")
+    subprocess.run(
+        "pip install flash-attn --no-build-isolation",
+        env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
+        shell=True,
+    )
+    import flash_attn
+    print("flash-attn installed.")
 import os
 import time
 import spaces
 BAD_WORD_KEYWORDS = ["(medium)"]
 def get_bad_words_ids(tokenizer: PreTrainedTokenizerFast):
     ids = [
         [id]
 def prepare_models():
+    model = AutoModelForPreTraining.from_pretrained(
+        MODEL_NAME, torch_dtype=torch.bfloat16, trust_remote_code=True
     )
     model.decoder_model.use_cache = True
     processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
     model.eval()
     model = model.to(DEVICE)
     # model = torch.compile(model)
     model, processor = prepare_models()
     ban_ids = get_bad_words_ids(processor.decoder_tokenizer)
+    translation_mode_map = {
+        "translate": "exact",
+        "translate+extend": "approx",
+    }
     @spaces.GPU(duration=5)
     @torch.inference_mode()
     def generate_tags(
         text: str,
         auto_detect: bool,
+        mode: str,
         copyright_tags: str = "",
         length: str = "short",
         max_new_tokens: int = 128,
             "<|bos|>"
             f"<|aspect_ratio:tall|><|rating:general|><|length:{length}|>"
             "<|reserved_2|><|reserved_3|><|reserved_4|>"
+            f"<|translate:{translation_mode_map[mode]}|><|input_end|>"
             "<copyright>" + copyright_tags.strip()
         )
         if not auto_detect:
                         ],
                         value="short",
                     )
+                    translation_mode = gr.Radio(
+                        label="Translation mode",
+                        choices=list(translation_mode_map.keys()),
+                        value=list(translation_mode_map.keys())[0],
+                    )
                     translate_btn = gr.Button(value="Translate", variant="primary")
                     with gr.Accordion(label="Advanced", open=False):
                         )
                 with gr.Column():
+                    output_translation = gr.Textbox(label="Output (translation)", lines=4, interactive=False)
+                    output_extension = gr.Textbox(label="Output (extension)", lines=4, interactive=False)
                     time_elapsed = gr.Markdown(value="")
             gr.Examples(
             inputs=[
                 text,
                 auto_detect,
+                translation_mode,
                 copyright_tags,
                 length,
                 max_new_tokens,
                 top_k,
                 top_p,
             ],
+            outputs=[output_translation, output_extension, time_elapsed],
         )
     ui.launch()