Ali Mohsin commited on
Commit
c150284
Β·
1 Parent(s): b678210

final new ultra fixes

Browse files
Files changed (2) hide show
  1. app.py +21 -10
  2. scripts/prepare_polyvore.py +10 -0
app.py CHANGED
@@ -672,7 +672,11 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
672
  # Global dataset size control
673
  with gr.Row():
674
  gr.Markdown("#### 🎯 **Global Dataset Size Control**")
675
- gr.Markdown("This affects BOTH dataset preparation AND training for ultra-fast testing")
 
 
 
 
676
 
677
  with gr.Row():
678
  global_dataset_size = gr.Dropdown(
@@ -684,7 +688,7 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
684
 
685
  # Apply dataset size button
686
  apply_size_btn = gr.Button("πŸ”„ Apply Dataset Size & Regenerate Splits", variant="primary")
687
- size_status = gr.Textbox(label="Dataset Size Status", value="Dataset size: 2000 samples", interactive=False)
688
 
689
  def apply_dataset_size(size: str):
690
  """Apply global dataset size and regenerate splits."""
@@ -693,20 +697,27 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
693
  return f"βœ… Using full dataset ({size}) - no size limit applied"
694
 
695
  # Call the dataset preparation with size limit
696
- from scripts.prepare_polyvore import main as prepare_polyvore
697
  import os
698
 
699
  # Set environment variable for dataset size
700
  os.environ["DATASET_SIZE_LIMIT"] = size
701
 
702
- # Regenerate splits with size limit
703
- result = prepare_polyvore(
704
- data_root="/home/user/app/data/Polyvore",
705
- output_dir="/home/user/app/data/Polyvore/splits",
706
- max_samples=int(size)
707
- )
 
708
 
709
- return f"βœ… Successfully regenerated splits with {size} samples limit"
 
 
 
 
 
 
710
  except Exception as e:
711
  return f"❌ Failed to apply dataset size: {str(e)}"
712
 
 
672
  # Global dataset size control
673
  with gr.Row():
674
  gr.Markdown("#### 🎯 **Global Dataset Size Control**")
675
+ gr.Markdown("**Note**: Initial bootstrap downloads full dataset (required). Use 'Apply' button to limit splits for testing.")
676
+
677
+ with gr.Row():
678
+ gr.Markdown("#### πŸ“Š **Current Behavior**")
679
+ gr.Markdown("β€’ **Bootstrap**: Downloads full dataset (53K outfits) - this is necessary\nβ€’ **Training**: Uses limited samples based on your selection\nβ€’ **Apply Button**: Regenerates splits with your selected size limit")
680
 
681
  with gr.Row():
682
  global_dataset_size = gr.Dropdown(
 
688
 
689
  # Apply dataset size button
690
  apply_size_btn = gr.Button("πŸ”„ Apply Dataset Size & Regenerate Splits", variant="primary")
691
+ size_status = gr.Textbox(label="Dataset Size Status", value="Dataset size: 2000 samples (click Apply to regenerate splits)", interactive=False)
692
 
693
  def apply_dataset_size(size: str):
694
  """Apply global dataset size and regenerate splits."""
 
697
  return f"βœ… Using full dataset ({size}) - no size limit applied"
698
 
699
  # Call the dataset preparation with size limit
700
+ import subprocess
701
  import os
702
 
703
  # Set environment variable for dataset size
704
  os.environ["DATASET_SIZE_LIMIT"] = size
705
 
706
+ # Regenerate splits with size limit using subprocess
707
+ cmd = [
708
+ "python", "scripts/prepare_polyvore.py",
709
+ "--root", "/home/user/app/data/Polyvore",
710
+ "--out", "/home/user/app/data/Polyvore/splits",
711
+ "--max_samples", size
712
+ ]
713
 
714
+ result = subprocess.run(cmd, capture_output=True, text=True, check=False)
715
+
716
+ if result.returncode == 0:
717
+ return f"βœ… Successfully regenerated splits with {size} samples limit"
718
+ else:
719
+ return f"❌ Failed to regenerate splits: {result.stderr}"
720
+
721
  except Exception as e:
722
  return f"❌ Failed to apply dataset size: {str(e)}"
723
 
scripts/prepare_polyvore.py CHANGED
@@ -358,6 +358,7 @@ def main() -> None:
358
  ap = argparse.ArgumentParser()
359
  ap.add_argument("--root", type=str, required=True, help="Polyvore dataset root")
360
  ap.add_argument("--out", type=str, default=None, help="Output directory for splits (default: <root>/splits)")
 
361
  ap.add_argument("--max_triplets", type=int, default=200000)
362
  ap.add_argument("--neg_per_pos", type=int, default=1)
363
  ap.add_argument("--force_random_split", action="store_true", help="Force random split creation (not recommended)")
@@ -423,6 +424,15 @@ def main() -> None:
423
  print("πŸ”§ Please ensure official splits are available in nondisjoint/ or disjoint/ folders.")
424
  return
425
 
 
 
 
 
 
 
 
 
 
426
  # Generate training data for each split
427
  for split, outfits in splits.items():
428
  if not outfits:
 
358
  ap = argparse.ArgumentParser()
359
  ap.add_argument("--root", type=str, required=True, help="Polyvore dataset root")
360
  ap.add_argument("--out", type=str, default=None, help="Output directory for splits (default: <root>/splits)")
361
+ ap.add_argument("--max_samples", type=int, default=None, help="Maximum number of samples to use (for testing)")
362
  ap.add_argument("--max_triplets", type=int, default=200000)
363
  ap.add_argument("--neg_per_pos", type=int, default=1)
364
  ap.add_argument("--force_random_split", action="store_true", help="Force random split creation (not recommended)")
 
424
  print("πŸ”§ Please ensure official splits are available in nondisjoint/ or disjoint/ folders.")
425
  return
426
 
427
+ # Apply dataset size limit if specified
428
+ if args.max_samples:
429
+ print(f"🎯 Limiting dataset to {args.max_samples} samples for testing...")
430
+ for split in splits:
431
+ if splits[split]:
432
+ # Take only the first max_samples outfits
433
+ splits[split] = splits[split][:args.max_samples]
434
+ print(f" πŸ“Š {split}: Limited to {len(splits[split])} outfits")
435
+
436
  # Generate training data for each split
437
  for split, outfits in splits.items():
438
  if not outfits: