Spaces:

NHLOCAL
/

Kav-Venaki

Running

App Files Files Community

NHLOCAL commited on 30 days ago

Commit

acc19c4

1 Parent(s): 1af19a0

עיצוב מחדש עם תמונות דוגמה נוספות

Browse files

Files changed (6) hide show

app.py +47 -32
sam2.1/configs/sam2.1/sam2.1_hiera_b+.yaml +0 -116
sam2.1/configs/sam2.1/sam2.1_hiera_l.yaml +0 -120
sam2.1/configs/sam2.1/sam2.1_hiera_s.yaml +0 -119
sam2.1/configs/sam2.1/sam2.1_hiera_t.yaml +0 -121
style.css +41 -0

app.py CHANGED Viewed

@@ -37,42 +37,57 @@ def inference(image: Image.Image, gemini_api_key: str):
         error_message += traceback.format_exc()
         return None, gr.update(value=error_message, visible=True)  # החזרת Textbox גלוי עם שגיאה
-title_str = "זיהוי וטשטוש נשים בתמונה"
-description_str = """<p style='text-align: right; direction: rtl'>
-    העלה תמונה, הכנס את מפתח ה־API של Gemini,<br>
-    ולחץ על "הרץ" כדי לזהות ולטשטש נשים בתמונה באופן אוטומטי
-</p>
 """
-# נתיב לתמונת דוגמה
-EXAMPLE_IMAGE = "example_images/example.jpg"
-demo = gr.Interface(
-    fn=inference,
-    inputs=[
-        gr.Image(type="pil", label="בחר תמונה לניתוח או גרור אותה לכאן"),
-        gr.Textbox(
-            label="מפתח API של Gemini",
-            placeholder="הכנס את מפתח ה-API  כאן",
-            type="password"
-        )
-    ],
-    outputs=[
-        gr.Image(type="pil", label="תוצאה סופית"),
-        gr.Textbox(label="שגיאות", visible=False)  # הוספת רכיב להצגת שגיאות
-    ],
     title=title_str,
-    description=description_str,
-    examples=[
-        [EXAMPLE_IMAGE]  # תמונה בלבד, ללא מפתח API
-    ],
-    flagging_mode="never",
-    theme=gr.themes.Default()  # עיצוב קליל לממשק
-)
-if __name__ == "__main__":
-    # ניתן להגדיר share=True אם רוצים לשתף מחוץ לרשת המקומית
-    demo.launch()

         error_message += traceback.format_exc()
         return None, gr.update(value=error_message, visible=True)  # החזרת Textbox גלוי עם שגיאה
+title_str = "🤖 זיהוי וטשטוש נשים בתמונה"
+description_str = """
+<div style='text-align: center; direction: rtl'>
+    <p>
+        ברוכים הבאים לכלי לזיהוי וטשטוש נשים בתמונה!
+        <br>
+        העלו תמונה, הזינו את מפתח ה־API של Gemini,
+        ולחצו על "הרץ" כדי לנתח את התמונה ולטשטש אוטומטית נשים.
+    </p>
+    <p>
+        שימו לב: נדרש מפתח API תקין של Gemini כדי להשתמש בכלי זה.
+        <br>
+        הכלי משתמש בטכנולוגיות מתקדמות כמו YOLO, SAM2 ו-Gemini.
+    </p>
+</div>
 """
+# נתיבים לתמונות דוגמה
+EXAMPLE_IMAGES = ["example_images/example.jpg", "example_images/example2.jpg", "example_images/example3.jpg"]
+with gr.Blocks(
     title=title_str,
+    css="style.css"  # קישור לקובץ CSS
+) as demo:
+    gr.Markdown(f"<h1 style='text-align: center;'>{title_str}</h1>")
+    gr.Markdown(description_str)
+    with gr.Row():
+        with gr.Column():
+            image_input = gr.Image(type="pil", label="🖼️ בחרו תמונה לניתוח")
+            api_key_input = gr.Textbox(
+                label="🔑 מפתח API של Gemini",
+                placeholder="הכניסו את מפתח ה-API  כאן",
+                type="password"
+            )
+            submit_button = gr.Button("🚀 הרץ", variant="primary")
+            gr.Examples(
+                examples=EXAMPLE_IMAGES,
+                inputs=image_input,
+                label="👇 דוגמאות",
+                # cache_examples=True # caching examples speeds up start time, but uses more memory
+            )
+        with gr.Column():
+            image_output = gr.Image(type="pil", label="🖼️ תוצאה לאחר טשטוש")
+            error_output = gr.Textbox(label="📜 שגיאות", visible=False, lines=5)
+    submit_button.click(
+        fn=inference,
+        inputs=[image_input, api_key_input],
+        outputs=[image_output, error_output]
+    )
+if __name__ == "__main__":
+    demo.launch()

sam2.1/configs/sam2.1/sam2.1_hiera_b+.yaml DELETED Viewed

@@ -1,116 +0,0 @@
-# @package _global_
-# Model
-model:
-  _target_: sam2.modeling.sam2_base.SAM2Base
-  image_encoder:
-    _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
-    scalp: 1
-    trunk:
-      _target_: sam2.modeling.backbones.hieradet.Hiera
-      embed_dim: 112
-      num_heads: 2
-    neck:
-      _target_: sam2.modeling.backbones.image_encoder.FpnNeck
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 256
-        normalize: true
-        scale: null
-        temperature: 10000
-      d_model: 256
-      backbone_channel_list: [896, 448, 224, 112]
-      fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
-      fpn_interp_model: nearest
-  memory_attention:
-    _target_: sam2.modeling.memory_attention.MemoryAttention
-    d_model: 256
-    pos_enc_at_input: true
-    layer:
-      _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
-      activation: relu
-      dim_feedforward: 2048
-      dropout: 0.1
-      pos_enc_at_attn: false
-      self_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-      d_model: 256
-      pos_enc_at_cross_attn_keys: true
-      pos_enc_at_cross_attn_queries: false
-      cross_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        rope_k_repeat: True
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-        kv_in_dim: 64
-    num_layers: 4
-  memory_encoder:
-      _target_: sam2.modeling.memory_encoder.MemoryEncoder
-      out_dim: 64
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 64
-        normalize: true
-        scale: null
-        temperature: 10000
-      mask_downsampler:
-        _target_: sam2.modeling.memory_encoder.MaskDownSampler
-        kernel_size: 3
-        stride: 2
-        padding: 1
-      fuser:
-        _target_: sam2.modeling.memory_encoder.Fuser
-        layer:
-          _target_: sam2.modeling.memory_encoder.CXBlock
-          dim: 256
-          kernel_size: 7
-          padding: 3
-          layer_scale_init_value: 1e-6
-          use_dwconv: True  # depth-wise convs
-        num_layers: 2
-  num_maskmem: 7
-  image_size: 1024
-  # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
-  sigmoid_scale_for_mem_enc: 20.0
-  sigmoid_bias_for_mem_enc: -10.0
-  use_mask_input_as_output_without_sam: true
-  # Memory
-  directly_add_no_mem_embed: true
-  no_obj_embed_spatial: true
-  # use high-resolution feature map in the SAM mask decoder
-  use_high_res_features_in_sam: true
-  # output 3 masks on the first click on initial conditioning frames
-  multimask_output_in_sam: true
-  # SAM heads
-  iou_prediction_use_sigmoid: True
-  # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
-  use_obj_ptrs_in_encoder: true
-  add_tpos_enc_to_obj_ptrs: true
-  proj_tpos_enc_in_obj_ptrs: true
-  use_signed_tpos_enc_to_obj_ptrs: true
-  only_obj_ptrs_in_the_past_for_eval: true
-  # object occlusion prediction
-  pred_obj_scores: true
-  pred_obj_scores_mlp: true
-  fixed_no_obj_ptr: true
-  # multimask tracking settings
-  multimask_output_for_tracking: true
-  use_multimask_token_for_obj_ptr: true
-  multimask_min_pt_num: 0
-  multimask_max_pt_num: 1
-  use_mlp_for_obj_ptr_proj: true
-  # Compilation flag
-  compile_image_encoder: False

sam2.1/configs/sam2.1/sam2.1_hiera_l.yaml DELETED Viewed

@@ -1,120 +0,0 @@
-# @package _global_
-# Model
-model:
-  _target_: sam2.modeling.sam2_base.SAM2Base
-  image_encoder:
-    _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
-    scalp: 1
-    trunk:
-      _target_: sam2.modeling.backbones.hieradet.Hiera
-      embed_dim: 144
-      num_heads: 2
-      stages: [2, 6, 36, 4]
-      global_att_blocks: [23, 33, 43]
-      window_pos_embed_bkg_spatial_size: [7, 7]
-      window_spec: [8, 4, 16, 8]
-    neck:
-      _target_: sam2.modeling.backbones.image_encoder.FpnNeck
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 256
-        normalize: true
-        scale: null
-        temperature: 10000
-      d_model: 256
-      backbone_channel_list: [1152, 576, 288, 144]
-      fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
-      fpn_interp_model: nearest
-  memory_attention:
-    _target_: sam2.modeling.memory_attention.MemoryAttention
-    d_model: 256
-    pos_enc_at_input: true
-    layer:
-      _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
-      activation: relu
-      dim_feedforward: 2048
-      dropout: 0.1
-      pos_enc_at_attn: false
-      self_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-      d_model: 256
-      pos_enc_at_cross_attn_keys: true
-      pos_enc_at_cross_attn_queries: false
-      cross_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        rope_k_repeat: True
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-        kv_in_dim: 64
-    num_layers: 4
-  memory_encoder:
-      _target_: sam2.modeling.memory_encoder.MemoryEncoder
-      out_dim: 64
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 64
-        normalize: true
-        scale: null
-        temperature: 10000
-      mask_downsampler:
-        _target_: sam2.modeling.memory_encoder.MaskDownSampler
-        kernel_size: 3
-        stride: 2
-        padding: 1
-      fuser:
-        _target_: sam2.modeling.memory_encoder.Fuser
-        layer:
-          _target_: sam2.modeling.memory_encoder.CXBlock
-          dim: 256
-          kernel_size: 7
-          padding: 3
-          layer_scale_init_value: 1e-6
-          use_dwconv: True  # depth-wise convs
-        num_layers: 2
-  num_maskmem: 7
-  image_size: 1024
-  # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
-  sigmoid_scale_for_mem_enc: 20.0
-  sigmoid_bias_for_mem_enc: -10.0
-  use_mask_input_as_output_without_sam: true
-  # Memory
-  directly_add_no_mem_embed: true
-  no_obj_embed_spatial: true
-  # use high-resolution feature map in the SAM mask decoder
-  use_high_res_features_in_sam: true
-  # output 3 masks on the first click on initial conditioning frames
-  multimask_output_in_sam: true
-  # SAM heads
-  iou_prediction_use_sigmoid: True
-  # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
-  use_obj_ptrs_in_encoder: true
-  add_tpos_enc_to_obj_ptrs: true
-  proj_tpos_enc_in_obj_ptrs: true
-  use_signed_tpos_enc_to_obj_ptrs: true
-  only_obj_ptrs_in_the_past_for_eval: true
-  # object occlusion prediction
-  pred_obj_scores: true
-  pred_obj_scores_mlp: true
-  fixed_no_obj_ptr: true
-  # multimask tracking settings
-  multimask_output_for_tracking: true
-  use_multimask_token_for_obj_ptr: true
-  multimask_min_pt_num: 0
-  multimask_max_pt_num: 1
-  use_mlp_for_obj_ptr_proj: true
-  # Compilation flag
-  compile_image_encoder: False

sam2.1/configs/sam2.1/sam2.1_hiera_s.yaml DELETED Viewed

@@ -1,119 +0,0 @@
-# @package _global_
-# Model
-model:
-  _target_: sam2.modeling.sam2_base.SAM2Base
-  image_encoder:
-    _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
-    scalp: 1
-    trunk:
-      _target_: sam2.modeling.backbones.hieradet.Hiera
-      embed_dim: 96
-      num_heads: 1
-      stages: [1, 2, 11, 2]
-      global_att_blocks: [7, 10, 13]
-      window_pos_embed_bkg_spatial_size: [7, 7]
-    neck:
-      _target_: sam2.modeling.backbones.image_encoder.FpnNeck
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 256
-        normalize: true
-        scale: null
-        temperature: 10000
-      d_model: 256
-      backbone_channel_list: [768, 384, 192, 96]
-      fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
-      fpn_interp_model: nearest
-  memory_attention:
-    _target_: sam2.modeling.memory_attention.MemoryAttention
-    d_model: 256
-    pos_enc_at_input: true
-    layer:
-      _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
-      activation: relu
-      dim_feedforward: 2048
-      dropout: 0.1
-      pos_enc_at_attn: false
-      self_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-      d_model: 256
-      pos_enc_at_cross_attn_keys: true
-      pos_enc_at_cross_attn_queries: false
-      cross_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        rope_k_repeat: True
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-        kv_in_dim: 64
-    num_layers: 4
-  memory_encoder:
-      _target_: sam2.modeling.memory_encoder.MemoryEncoder
-      out_dim: 64
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 64
-        normalize: true
-        scale: null
-        temperature: 10000
-      mask_downsampler:
-        _target_: sam2.modeling.memory_encoder.MaskDownSampler
-        kernel_size: 3
-        stride: 2
-        padding: 1
-      fuser:
-        _target_: sam2.modeling.memory_encoder.Fuser
-        layer:
-          _target_: sam2.modeling.memory_encoder.CXBlock
-          dim: 256
-          kernel_size: 7
-          padding: 3
-          layer_scale_init_value: 1e-6
-          use_dwconv: True  # depth-wise convs
-        num_layers: 2
-  num_maskmem: 7
-  image_size: 1024
-  # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
-  sigmoid_scale_for_mem_enc: 20.0
-  sigmoid_bias_for_mem_enc: -10.0
-  use_mask_input_as_output_without_sam: true
-  # Memory
-  directly_add_no_mem_embed: true
-  no_obj_embed_spatial: true
-  # use high-resolution feature map in the SAM mask decoder
-  use_high_res_features_in_sam: true
-  # output 3 masks on the first click on initial conditioning frames
-  multimask_output_in_sam: true
-  # SAM heads
-  iou_prediction_use_sigmoid: True
-  # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
-  use_obj_ptrs_in_encoder: true
-  add_tpos_enc_to_obj_ptrs: true
-  proj_tpos_enc_in_obj_ptrs: true
-  use_signed_tpos_enc_to_obj_ptrs: true
-  only_obj_ptrs_in_the_past_for_eval: true
-  # object occlusion prediction
-  pred_obj_scores: true
-  pred_obj_scores_mlp: true
-  fixed_no_obj_ptr: true
-  # multimask tracking settings
-  multimask_output_for_tracking: true
-  use_multimask_token_for_obj_ptr: true
-  multimask_min_pt_num: 0
-  multimask_max_pt_num: 1
-  use_mlp_for_obj_ptr_proj: true
-  # Compilation flag
-  compile_image_encoder: False

sam2.1/configs/sam2.1/sam2.1_hiera_t.yaml DELETED Viewed

@@ -1,121 +0,0 @@
-# @package _global_
-# Model
-model:
-  _target_: sam2.modeling.sam2_base.SAM2Base
-  image_encoder:
-    _target_: sam2.modeling.backbones.image_encoder.ImageEncoder
-    scalp: 1
-    trunk:
-      _target_: sam2.modeling.backbones.hieradet.Hiera
-      embed_dim: 96
-      num_heads: 1
-      stages: [1, 2, 7, 2]
-      global_att_blocks: [5, 7, 9]
-      window_pos_embed_bkg_spatial_size: [7, 7]
-    neck:
-      _target_: sam2.modeling.backbones.image_encoder.FpnNeck
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 256
-        normalize: true
-        scale: null
-        temperature: 10000
-      d_model: 256
-      backbone_channel_list: [768, 384, 192, 96]
-      fpn_top_down_levels: [2, 3]  # output level 0 and 1 directly use the backbone features
-      fpn_interp_model: nearest
-  memory_attention:
-    _target_: sam2.modeling.memory_attention.MemoryAttention
-    d_model: 256
-    pos_enc_at_input: true
-    layer:
-      _target_: sam2.modeling.memory_attention.MemoryAttentionLayer
-      activation: relu
-      dim_feedforward: 2048
-      dropout: 0.1
-      pos_enc_at_attn: false
-      self_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-      d_model: 256
-      pos_enc_at_cross_attn_keys: true
-      pos_enc_at_cross_attn_queries: false
-      cross_attention:
-        _target_: sam2.modeling.sam.transformer.RoPEAttention
-        rope_theta: 10000.0
-        feat_sizes: [64, 64]
-        rope_k_repeat: True
-        embedding_dim: 256
-        num_heads: 1
-        downsample_rate: 1
-        dropout: 0.1
-        kv_in_dim: 64
-    num_layers: 4
-  memory_encoder:
-      _target_: sam2.modeling.memory_encoder.MemoryEncoder
-      out_dim: 64
-      position_encoding:
-        _target_: sam2.modeling.position_encoding.PositionEmbeddingSine
-        num_pos_feats: 64
-        normalize: true
-        scale: null
-        temperature: 10000
-      mask_downsampler:
-        _target_: sam2.modeling.memory_encoder.MaskDownSampler
-        kernel_size: 3
-        stride: 2
-        padding: 1
-      fuser:
-        _target_: sam2.modeling.memory_encoder.Fuser
-        layer:
-          _target_: sam2.modeling.memory_encoder.CXBlock
-          dim: 256
-          kernel_size: 7
-          padding: 3
-          layer_scale_init_value: 1e-6
-          use_dwconv: True  # depth-wise convs
-        num_layers: 2
-  num_maskmem: 7
-  image_size: 1024
-  # apply scaled sigmoid on mask logits for memory encoder, and directly feed input mask as output mask
-  # SAM decoder
-  sigmoid_scale_for_mem_enc: 20.0
-  sigmoid_bias_for_mem_enc: -10.0
-  use_mask_input_as_output_without_sam: true
-  # Memory
-  directly_add_no_mem_embed: true
-  no_obj_embed_spatial: true
-  # use high-resolution feature map in the SAM mask decoder
-  use_high_res_features_in_sam: true
-  # output 3 masks on the first click on initial conditioning frames
-  multimask_output_in_sam: true
-  # SAM heads
-  iou_prediction_use_sigmoid: True
-  # cross-attend to object pointers from other frames (based on SAM output tokens) in the encoder
-  use_obj_ptrs_in_encoder: true
-  add_tpos_enc_to_obj_ptrs: true
-  proj_tpos_enc_in_obj_ptrs: true
-  use_signed_tpos_enc_to_obj_ptrs: true
-  only_obj_ptrs_in_the_past_for_eval: true
-  # object occlusion prediction
-  pred_obj_scores: true
-  pred_obj_scores_mlp: true
-  fixed_no_obj_ptr: true
-  # multimask tracking settings
-  multimask_output_for_tracking: true
-  use_multimask_token_for_obj_ptr: true
-  multimask_min_pt_num: 0
-  multimask_max_pt_num: 1
-  use_mlp_for_obj_ptr_proj: true
-  # Compilation flag
-  # HieraT does not currently support compilation, should always be set to False
-  compile_image_encoder: False

style.css ADDED Viewed

	@@ -0,0 +1,41 @@

+/* שינוי צבע כפתור "הרץ" */
+.primary {
+    background-color: #4CAF50 !important; /* גוון ירוק */
+    color: white !important;
+    border-color: #4CAF50 !important;
+}
+/* עיצוב תיבות קלט ופלט */
+.gr-image, .gr-textbox {
+    border: 2px solid #ccc;
+    border-radius: 8px;
+    padding: 10px;
+}
+/* עיצוב כותרות */
+.gr-image label, .gr-textbox label {
+    font-weight: bold;
+    color: #333;
+}
+/* עיצוב גלריית הדוגמאות */
+.sample-container {
+    border: 1px solid #ccc;
+    border-radius: 8px;
+    margin-bottom: 10px; /* מרווח בין גלריית הדוגמאות לכפתור */
+}
+.sample-container > .prose > h4 {
+    margin-bottom: 0px; /* ביטול מרווח מתחת לכותרת של גלריית הדוגמאות */
+}
+.gr-sample img {
+    border: 1px solid #ccc;
+    border-radius: 8px;
+    object-fit: cover; /* חיתוך תמונה לגודל קבוע */
+}
+.gr-sample img:hover {
+    border-color: #4CAF50;
+    cursor: pointer;
+}