Spaces:
Running
Running
Ali Mohsin
commited on
Commit
Β·
c150284
1
Parent(s):
b678210
final new ultra fixes
Browse files- app.py +21 -10
- scripts/prepare_polyvore.py +10 -0
app.py
CHANGED
|
@@ -672,7 +672,11 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
|
|
| 672 |
# Global dataset size control
|
| 673 |
with gr.Row():
|
| 674 |
gr.Markdown("#### π― **Global Dataset Size Control**")
|
| 675 |
-
gr.Markdown("
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
|
| 677 |
with gr.Row():
|
| 678 |
global_dataset_size = gr.Dropdown(
|
|
@@ -684,7 +688,7 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
|
|
| 684 |
|
| 685 |
# Apply dataset size button
|
| 686 |
apply_size_btn = gr.Button("π Apply Dataset Size & Regenerate Splits", variant="primary")
|
| 687 |
-
size_status = gr.Textbox(label="Dataset Size Status", value="Dataset size: 2000 samples", interactive=False)
|
| 688 |
|
| 689 |
def apply_dataset_size(size: str):
|
| 690 |
"""Apply global dataset size and regenerate splits."""
|
|
@@ -693,20 +697,27 @@ with gr.Blocks(fill_height=True, title="Dressify - Advanced Outfit Recommendatio
|
|
| 693 |
return f"β
Using full dataset ({size}) - no size limit applied"
|
| 694 |
|
| 695 |
# Call the dataset preparation with size limit
|
| 696 |
-
|
| 697 |
import os
|
| 698 |
|
| 699 |
# Set environment variable for dataset size
|
| 700 |
os.environ["DATASET_SIZE_LIMIT"] = size
|
| 701 |
|
| 702 |
-
# Regenerate splits with size limit
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
|
|
|
| 708 |
|
| 709 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
except Exception as e:
|
| 711 |
return f"β Failed to apply dataset size: {str(e)}"
|
| 712 |
|
|
|
|
| 672 |
# Global dataset size control
|
| 673 |
with gr.Row():
|
| 674 |
gr.Markdown("#### π― **Global Dataset Size Control**")
|
| 675 |
+
gr.Markdown("**Note**: Initial bootstrap downloads full dataset (required). Use 'Apply' button to limit splits for testing.")
|
| 676 |
+
|
| 677 |
+
with gr.Row():
|
| 678 |
+
gr.Markdown("#### π **Current Behavior**")
|
| 679 |
+
gr.Markdown("β’ **Bootstrap**: Downloads full dataset (53K outfits) - this is necessary\nβ’ **Training**: Uses limited samples based on your selection\nβ’ **Apply Button**: Regenerates splits with your selected size limit")
|
| 680 |
|
| 681 |
with gr.Row():
|
| 682 |
global_dataset_size = gr.Dropdown(
|
|
|
|
| 688 |
|
| 689 |
# Apply dataset size button
|
| 690 |
apply_size_btn = gr.Button("π Apply Dataset Size & Regenerate Splits", variant="primary")
|
| 691 |
+
size_status = gr.Textbox(label="Dataset Size Status", value="Dataset size: 2000 samples (click Apply to regenerate splits)", interactive=False)
|
| 692 |
|
| 693 |
def apply_dataset_size(size: str):
|
| 694 |
"""Apply global dataset size and regenerate splits."""
|
|
|
|
| 697 |
return f"β
Using full dataset ({size}) - no size limit applied"
|
| 698 |
|
| 699 |
# Call the dataset preparation with size limit
|
| 700 |
+
import subprocess
|
| 701 |
import os
|
| 702 |
|
| 703 |
# Set environment variable for dataset size
|
| 704 |
os.environ["DATASET_SIZE_LIMIT"] = size
|
| 705 |
|
| 706 |
+
# Regenerate splits with size limit using subprocess
|
| 707 |
+
cmd = [
|
| 708 |
+
"python", "scripts/prepare_polyvore.py",
|
| 709 |
+
"--root", "/home/user/app/data/Polyvore",
|
| 710 |
+
"--out", "/home/user/app/data/Polyvore/splits",
|
| 711 |
+
"--max_samples", size
|
| 712 |
+
]
|
| 713 |
|
| 714 |
+
result = subprocess.run(cmd, capture_output=True, text=True, check=False)
|
| 715 |
+
|
| 716 |
+
if result.returncode == 0:
|
| 717 |
+
return f"β
Successfully regenerated splits with {size} samples limit"
|
| 718 |
+
else:
|
| 719 |
+
return f"β Failed to regenerate splits: {result.stderr}"
|
| 720 |
+
|
| 721 |
except Exception as e:
|
| 722 |
return f"β Failed to apply dataset size: {str(e)}"
|
| 723 |
|
scripts/prepare_polyvore.py
CHANGED
|
@@ -358,6 +358,7 @@ def main() -> None:
|
|
| 358 |
ap = argparse.ArgumentParser()
|
| 359 |
ap.add_argument("--root", type=str, required=True, help="Polyvore dataset root")
|
| 360 |
ap.add_argument("--out", type=str, default=None, help="Output directory for splits (default: <root>/splits)")
|
|
|
|
| 361 |
ap.add_argument("--max_triplets", type=int, default=200000)
|
| 362 |
ap.add_argument("--neg_per_pos", type=int, default=1)
|
| 363 |
ap.add_argument("--force_random_split", action="store_true", help="Force random split creation (not recommended)")
|
|
@@ -423,6 +424,15 @@ def main() -> None:
|
|
| 423 |
print("π§ Please ensure official splits are available in nondisjoint/ or disjoint/ folders.")
|
| 424 |
return
|
| 425 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 426 |
# Generate training data for each split
|
| 427 |
for split, outfits in splits.items():
|
| 428 |
if not outfits:
|
|
|
|
| 358 |
ap = argparse.ArgumentParser()
|
| 359 |
ap.add_argument("--root", type=str, required=True, help="Polyvore dataset root")
|
| 360 |
ap.add_argument("--out", type=str, default=None, help="Output directory for splits (default: <root>/splits)")
|
| 361 |
+
ap.add_argument("--max_samples", type=int, default=None, help="Maximum number of samples to use (for testing)")
|
| 362 |
ap.add_argument("--max_triplets", type=int, default=200000)
|
| 363 |
ap.add_argument("--neg_per_pos", type=int, default=1)
|
| 364 |
ap.add_argument("--force_random_split", action="store_true", help="Force random split creation (not recommended)")
|
|
|
|
| 424 |
print("π§ Please ensure official splits are available in nondisjoint/ or disjoint/ folders.")
|
| 425 |
return
|
| 426 |
|
| 427 |
+
# Apply dataset size limit if specified
|
| 428 |
+
if args.max_samples:
|
| 429 |
+
print(f"π― Limiting dataset to {args.max_samples} samples for testing...")
|
| 430 |
+
for split in splits:
|
| 431 |
+
if splits[split]:
|
| 432 |
+
# Take only the first max_samples outfits
|
| 433 |
+
splits[split] = splits[split][:args.max_samples]
|
| 434 |
+
print(f" π {split}: Limited to {len(splits[split])} outfits")
|
| 435 |
+
|
| 436 |
# Generate training data for each split
|
| 437 |
for split, outfits in splits.items():
|
| 438 |
if not outfits:
|