Spaces:

Stylique
/

recomendation

Running

App Files Files Community

Ali Mohsin commited on Sep 8

Commit

25bdf34

1 Parent(s): 2c856cd

final prod

Browse files

Files changed (8) hide show

.gitignore +765 -0
app.py +26 -22
inference.py +94 -14
train_resnet.py +57 -8
train_vit_triplet.py +61 -11
utils/advanced_metrics.py +287 -0
utils/hf_utils.py +82 -0
utils/triplet_mining.py +1 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,765 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be added to the global gitignore or merged into this project gitignore.  For a PyCharm
+#  project, it is recommended to include the following files:
+#  .idea/
+#  *.iml
+#  *.ipr
+#  *.iws
+.idea/
+*.iml
+*.ipr
+*.iws
+# VS Code
+.vscode/
+*.code-workspace
+# Sublime Text
+*.sublime-project
+*.sublime-workspace
+# Vim
+*.swp
+*.swo
+*~
+# Emacs
+*~
+\#*\#
+/.emacs.desktop
+/.emacs.desktop.lock
+*.elc
+auto-save-list
+tramp
+.\#*
+# macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+Icon
+._*
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+# Windows
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+*.tmp
+*.temp
+*.bak
+*.swp
+*~.nib
+local.properties
+.settings/
+.loadpath
+.recommenders
+.target/
+.metadata
+.factorypath
+.buildpath
+.project
+.classpath
+*.launch
+.pydevproject
+.cproject
+.autotools
+.factorypath
+.buildpath
+.target
+.tern-project
+.idea/
+*.iml
+*.ipr
+*.iws
+.settings/
+.loadpath
+.recommenders
+.target/
+.metadata
+.factorypath
+.buildpath
+.project
+.classpath
+*.launch
+.pydevproject
+.cproject
+.autotools
+.factorypath
+.buildpath
+.target
+.tern-project
+.idea/
+*.iml
+*.ipr
+*.iws
+.settings/
+.loadpath
+.recommenders
+.target/
+.metadata
+.factorypath
+.buildpath
+.project
+.classpath
+*.launch
+.pydevproject
+.cproject
+.autotools
+.factorypath
+.buildpath
+.target
+.tern-project
+# Linux
+*~
+.fuse_hidden*
+.directory
+.Trash-*
+.nfs*
+# Machine Learning / Deep Learning specific
+# Model checkpoints and weights
+*.pth
+*.pt
+*.ckpt
+*.h5
+*.hdf5
+*.pb
+*.pkl
+*.pickle
+*.joblib
+*.model
+*.weights
+*.bin
+*.safetensors
+# Training logs and outputs
+logs/
+runs/
+wandb/
+tensorboard/
+lightning_logs/
+mlruns/
+outputs/
+checkpoints/
+models/
+experiments/
+results/
+artifacts/
+# Data files (large datasets)
+data/
+datasets/
+*.csv
+*.tsv
+*.json
+*.jsonl
+*.parquet
+*.feather
+*.arrow
+*.h5
+*.hdf5
+*.npz
+*.npy
+*.mat
+*.pkl
+*.pickle
+# Image files (if not needed in repo)
+*.jpg
+*.jpeg
+*.png
+*.gif
+*.bmp
+*.tiff
+*.tif
+*.webp
+*.svg
+*.ico
+# Video files
+*.mp4
+*.avi
+*.mov
+*.wmv
+*.flv
+*.webm
+*.mkv
+# Audio files
+*.mp3
+*.wav
+*.flac
+*.aac
+*.ogg
+*.wma
+# Archive files
+*.zip
+*.tar
+*.tar.gz
+*.tar.bz2
+*.tar.xz
+*.rar
+*.7z
+*.gz
+*.bz2
+*.xz
+# Hugging Face specific
+.cache/
+huggingface/
+transformers_cache/
+datasets_cache/
+# Jupyter notebook checkpoints
+.ipynb_checkpoints/
+# Temporary files
+tmp/
+temp/
+.tmp/
+.temp/
+# Configuration files with secrets
+.env
+.env.local
+.env.production
+.env.staging
+config.ini
+secrets.json
+credentials.json
+*.key
+*.pem
+*.crt
+*.p12
+*.pfx
+# IDE and editor files
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.project
+.pydevproject
+.settings/
+# OS generated files
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+# Project specific
+# Exclude large model files and datasets
+models/exports/
+data/Polyvore/
+*.pth
+*.pt
+*.ckpt
+# Exclude generated files
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+# Exclude virtual environments
+venv/
+env/
+ENV/
+.venv/
+.env/
+# Exclude test outputs
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+# Exclude documentation builds
+docs/_build/
+site/
+# Exclude temporary files
+*.tmp
+*.temp
+*.bak
+*.swp
+*~
+# Exclude logs
+*.log
+logs/
+# Exclude cache directories
+.cache/
+.pytest_cache/
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Exclude coverage reports
+.coverage
+.coverage.*
+coverage.xml
+*.cover
+.hypothesis/
+# Exclude profiling data
+.prof
+# Exclude Jupyter notebook checkpoints
+.ipynb_checkpoints/
+# Exclude IPython
+profile_default/
+ipython_config.py
+# Exclude pyenv
+.python-version
+# Exclude pipenv
+Pipfile.lock
+# Exclude poetry
+poetry.lock
+# Exclude pdm
+pdm.lock
+.pdm.toml
+# Exclude PEP 582
+__pypackages__/
+# Exclude Celery
+celerybeat-schedule
+celerybeat.pid
+# Exclude SageMath
+*.sage.py
+# Exclude Spyder
+.spyderproject
+.spyproject
+# Exclude Rope
+.ropeproject
+# Exclude mkdocs
+/site
+# Exclude mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Exclude Pyre
+.pyre/
+# Exclude pytype
+.pytype/
+# Exclude Cython
+cython_debug/
+# Exclude PyCharm
+.idea/
+*.iml
+*.ipr
+*.iws
+# Exclude VS Code
+.vscode/
+*.code-workspace
+# Exclude Sublime Text
+*.sublime-project
+*.sublime-workspace
+# Exclude Vim
+*.swp
+*.swo
+*~
+# Exclude Emacs
+*~
+\#*\#
+/.emacs.desktop
+/.emacs.desktop.lock
+*.elc
+auto-save-list
+tramp
+.\#*
+# Exclude macOS
+.DS_Store
+.AppleDouble
+.LSOverride
+Icon
+._*
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+# Exclude Windows
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+*.tmp
+*.temp
+*.bak
+*.swp
+*~.nib
+local.properties
+.settings/
+.loadpath
+.recommenders
+.target/
+.metadata
+.factorypath
+.buildpath
+.project
+.classpath
+*.launch
+.pydevproject
+.cproject
+.autotools
+.factorypath
+.buildpath
+.target
+.tern-project
+.idea/
+*.iml
+*.ipr
+*.iws
+.settings/
+.loadpath
+.recommenders
+.target/
+.metadata
+.factorypath
+.buildpath
+.project
+.classpath
+*.launch
+.pydevproject
+.cproject
+.autotools
+.factorypath
+.buildpath
+.target
+.tern-project
+# Exclude Linux
+*~
+.fuse_hidden*
+.directory
+.Trash-*
+.nfs*
+# Exclude Machine Learning files
+*.pth
+*.pt
+*.ckpt
+*.h5
+*.hdf5
+*.pb
+*.pkl
+*.pickle
+*.joblib
+*.model
+*.weights
+*.bin
+*.safetensors
+# Exclude training outputs
+logs/
+runs/
+wandb/
+tensorboard/
+lightning_logs/
+mlruns/
+outputs/
+checkpoints/
+models/
+experiments/
+results/
+artifacts/
+# Exclude data files
+data/
+datasets/
+*.csv
+*.tsv
+*.json
+*.jsonl
+*.parquet
+*.feather
+*.arrow
+*.h5
+*.hdf5
+*.npz
+*.npy
+*.mat
+*.pkl
+*.pickle
+# Exclude media files
+*.jpg
+*.jpeg
+*.png
+*.gif
+*.bmp
+*.tiff
+*.tif
+*.webp
+*.svg
+*.ico
+*.mp4
+*.avi
+*.mov
+*.wmv
+*.flv
+*.webm
+*.mkv
+*.mp3
+*.wav
+*.flac
+*.aac
+*.ogg
+*.wma
+# Exclude archives
+*.zip
+*.tar
+*.tar.gz
+*.tar.bz2
+*.tar.xz
+*.rar
+*.7z
+*.gz
+*.bz2
+*.xz
+# Exclude Hugging Face cache
+.cache/
+huggingface/
+transformers_cache/
+datasets_cache/
+# Exclude temporary files
+tmp/
+temp/
+.tmp/
+.temp/
+# Exclude secrets
+.env
+.env.local
+.env.production
+.env.staging
+config.ini
+secrets.json
+credentials.json
+*.key
+*.pem
+*.crt
+*.p12
+*.pfx

app.py CHANGED Viewed

@@ -152,9 +152,9 @@ def push_splits_to_hf(token, username):
         return "❌ Please provide HF token and username"
     try:
-        from utils.hf_hub_integration import create_hf_integration
-        hf = create_hf_integration(token)
-        result = hf.upload_splits_to_hf()
         if result.get("success"):
             return f"✅ Successfully uploaded splits to {username}/Dressify-Helper"
@@ -169,9 +169,9 @@ def push_models_to_hf(token, username):
         return "❌ Please provide HF token and username"
     try:
-        from utils.hf_hub_integration import create_hf_integration
-        hf = create_hf_integration(token)
-        result = hf.upload_models_to_hf()
         if result.get("success"):
             return f"✅ Successfully uploaded models to {username}/dressify-models"
@@ -186,9 +186,9 @@ def push_everything_to_hf(token, username):
         return "❌ Please provide HF token and username"
     try:
-        from utils.hf_hub_integration import create_hf_integration
-        hf = create_hf_integration(token)
-        result = hf.upload_everything_to_hf()
         if result.get("success"):
             return f"✅ Successfully uploaded everything to HF Hub"
@@ -271,13 +271,15 @@ def _background_bootstrap():
         if not os.path.exists(resnet_ckpt):
             BOOT_STATUS = "training-resnet"
             subprocess.run([
-                "python", "train_resnet.py", "--data_root", ds_root, "--epochs", "3",
                 "--out", os.path.join(export_dir, "resnet_item_embedder.pth")
             ], check=False)
         if not os.path.exists(vit_ckpt):
             BOOT_STATUS = "training-vit"
             subprocess.run([
-                "python", "train_vit_triplet.py", "--data_root", ds_root, "--epochs", "3",
                 "--export", os.path.join(export_dir, "vit_outfit_model.pth")
             ], check=False)
         service.reload_models()
@@ -600,9 +602,9 @@ def start_training_advanced(
                 if hf_token:
                     log_message += "📤 Auto-uploading artifacts to Hugging Face Hub...\n"
                     try:
-                        from utils.hf_hub_integration import create_hf_integration
-                        hf = create_hf_integration(hf_token)
-                        result = hf.upload_everything_to_hf()
                         if result.get("success"):
                             log_message += "✅ Successfully uploaded to HF Hub!\n"
                             log_message += "🔗 Models: https://huggingface.co/Stylique/dressify-models\n"
@@ -647,9 +649,10 @@ def start_training_simple(dataset_size: str, res_epochs: int, vit_epochs: int):
             # Train ResNet first and wait for completion
             log_message += f"\n🚀 Starting ResNet training on {dataset_size} samples...\n"
-            resnet_result =             subprocess.run([
-                "python", "train_resnet.py", "--data_root", DATASET_ROOT, "--epochs", str(res_epochs),
-                "--batch_size", "8", "--out", os.path.join(export_dir, "resnet_item_embedder.pth")
             ] + dataset_args, capture_output=True, text=True, check=False)
             if resnet_result.returncode == 0:
@@ -674,8 +677,9 @@ def start_training_simple(dataset_size: str, res_epochs: int, vit_epochs: int):
             log_message += f"\n🚀 Starting ViT training on {dataset_size} samples...\n"
             vit_result = subprocess.run([
-                "python", "train_vit_triplet.py", "--data_root", DATASET_ROOT, "--epochs", str(vit_epochs),
-                "--batch_size", "8", "--export", os.path.join(export_dir, "vit_outfit_model.pth")
             ] + dataset_args, capture_output=True, text=True, check=False)
             if vit_result.returncode == 0:
@@ -692,9 +696,9 @@ def start_training_simple(dataset_size: str, res_epochs: int, vit_epochs: int):
             if hf_token:
                 log_message += "\n📤 Auto-uploading artifacts to Hugging Face Hub...\n"
                 try:
-                    from utils.hf_hub_integration import create_hf_integration
-                    hf = create_hf_integration(hf_token)
-                    result = hf.upload_everything_to_hf()
                     if result.get("success"):
                         log_message += "✅ Successfully uploaded to HF Hub!\n"
                         log_message += "🔗 Models: https://huggingface.co/Stylique/dressify-models\n"

         return "❌ Please provide HF token and username"
     try:
+        from utils.hf_utils import HFModelManager
+        hf = HFModelManager(token=token, username=username)
+        result = hf.upload_model("splits", "Dressify-Helper")
         if result.get("success"):
             return f"✅ Successfully uploaded splits to {username}/Dressify-Helper"
         return "❌ Please provide HF token and username"
     try:
+        from utils.hf_utils import HFModelManager
+        hf = HFModelManager(token=token, username=username)
+        result = hf.upload_model("models", "dressify-models")
         if result.get("success"):
             return f"✅ Successfully uploaded models to {username}/dressify-models"
         return "❌ Please provide HF token and username"
     try:
+        from utils.hf_utils import HFModelManager
+        hf = HFModelManager(token=token, username=username)
+        result = hf.upload_model("everything", "dressify-complete")
         if result.get("success"):
             return f"✅ Successfully uploaded everything to HF Hub"
         if not os.path.exists(resnet_ckpt):
             BOOT_STATUS = "training-resnet"
             subprocess.run([
+                "python", "train_resnet.py", "--data_root", ds_root, "--epochs", "50",
+                "--batch_size", "16", "--lr", "1e-3", "--early_stopping_patience", "10",
                 "--out", os.path.join(export_dir, "resnet_item_embedder.pth")
             ], check=False)
         if not os.path.exists(vit_ckpt):
             BOOT_STATUS = "training-vit"
             subprocess.run([
+                "python", "train_vit_triplet.py", "--data_root", ds_root, "--epochs", "50",
+                "--batch_size", "16", "--lr", "5e-4", "--early_stopping_patience", "10",
                 "--export", os.path.join(export_dir, "vit_outfit_model.pth")
             ], check=False)
         service.reload_models()
                 if hf_token:
                     log_message += "📤 Auto-uploading artifacts to Hugging Face Hub...\n"
                     try:
+                        from utils.hf_utils import HFModelManager
+                        hf = HFModelManager(token=hf_token, username="Stylique")
+                        result = hf.upload_model("everything", "dressify-complete")
                         if result.get("success"):
                             log_message += "✅ Successfully uploaded to HF Hub!\n"
                             log_message += "🔗 Models: https://huggingface.co/Stylique/dressify-models\n"
             # Train ResNet first and wait for completion
             log_message += f"\n🚀 Starting ResNet training on {dataset_size} samples...\n"
+            resnet_result = subprocess.run([
+                "python", "train_resnet.py", "--data_root", DATASET_ROOT, "--epochs", "50",
+                "--batch_size", "16", "--lr", "1e-3", "--early_stopping_patience", "10",
+                "--out", os.path.join(export_dir, "resnet_item_embedder.pth")
             ] + dataset_args, capture_output=True, text=True, check=False)
             if resnet_result.returncode == 0:
             log_message += f"\n🚀 Starting ViT training on {dataset_size} samples...\n"
             vit_result = subprocess.run([
+                "python", "train_vit_triplet.py", "--data_root", DATASET_ROOT, "--epochs", "50",
+                "--batch_size", "16", "--lr", "5e-4", "--early_stopping_patience", "10",
+                "--export", os.path.join(export_dir, "vit_outfit_model.pth")
             ] + dataset_args, capture_output=True, text=True, check=False)
             if vit_result.returncode == 0:
             if hf_token:
                 log_message += "\n📤 Auto-uploading artifacts to Hugging Face Hub...\n"
                 try:
+                    from utils.hf_utils import HFModelManager
+                    hf = HFModelManager(token=hf_token, username="Stylique")
+                    result = hf.upload_model("everything", "dressify-complete")
                     if result.get("success"):
                         log_message += "✅ Successfully uploaded to HF Hub!\n"
                         log_message += "🔗 Models: https://huggingface.co/Stylique/dressify-models\n"

inference.py CHANGED Viewed

@@ -5,6 +5,7 @@ import numpy as np
 import torch
 import torch.nn as nn
 from PIL import Image
 from utils.transforms import build_inference_transform
 from models.resnet_embedder import ResNetItemEmbedder
@@ -40,7 +41,27 @@ class InferenceService:
         model = ResNetItemEmbedder(embedding_dim=self.embed_dim)
         if strategy == "random":
             return model
-        # prefer best if present
         best_path = os.path.join(os.path.dirname(ckpt_path), "resnet_item_embedder_best.pth")
         if os.path.exists(best_path):
             ckpt_to_use = best_path
@@ -48,11 +69,9 @@ class InferenceService:
             ckpt_to_use = ckpt_path
         if os.path.exists(ckpt_to_use):
             state = torch.load(ckpt_to_use, map_location="cpu")
-            # accept either full state_dict or {"state_dict": ...}
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
-            missing, unexpected = model.load_state_dict(state_dict, strict=False)
-            if len(unexpected) == 0:
-                return model
         return model
     def _load_vit(self) -> nn.Module:
@@ -61,6 +80,27 @@ class InferenceService:
         model = OutfitCompatibilityModel(embedding_dim=self.embed_dim)
         if strategy == "random":
             return model
         best_path = os.path.join(os.path.dirname(ckpt_path), "vit_outfit_model_best.pth")
         ckpt_to_use = best_path if os.path.exists(best_path) else ckpt_path
         if os.path.exists(ckpt_to_use):
@@ -118,32 +158,72 @@ class InferenceService:
         min_size, max_size = 4, 6
         ids = list(range(len(proc_items)))
-        # Slot-aware pools from categories (best-effort)
         def cat_str(i: int) -> str:
             return (proc_items[i].get("category") or "").lower()
-        uppers = [i for i in ids if any(k in cat_str(i) for k in ["top", "shirt", "tshirt", "blouse", "jacket", "hoodie"])]
-        bottoms = [i for i in ids if any(k in cat_str(i) for k in ["pant", "trouser", "jean", "skirt", "short"])]
-        shoes = [i for i in ids if any(k in cat_str(i) for k in ["shoe", "sneaker", "boot", "heel"])]
-        accs = [i for i in ids if any(k in cat_str(i) for k in ["watch", "belt", "ring", "bracelet", "accessor", "bag", "hat"])]
         candidates: List[List[int]] = []
         num_samples = max(num_outfits * 12, 24)
         for _ in range(num_samples):
             if uppers and bottoms and shoes and accs:
                 subset = [
                     int(rng.choice(uppers)),
                     int(rng.choice(bottoms)),
                     int(rng.choice(shoes)),
                     int(rng.choice(accs)),
                 ]
-                # Optional: add one more random distinct item
                 remain = list(set(ids) - set(subset))
-                if remain and rng.random() < 0.5:
-                    subset.append(int(rng.choice(remain)))
             else:
                 k = int(rng.integers(min_size, max_size + 1))
-                subset = list(map(int, rng.choice(ids, size=k, replace=False).tolist()))
             candidates.append(subset)
         # 3) Score using ViT

 import torch
 import torch.nn as nn
 from PIL import Image
+from huggingface_hub import hf_hub_download
 from utils.transforms import build_inference_transform
 from models.resnet_embedder import ResNetItemEmbedder
         model = ResNetItemEmbedder(embedding_dim=self.embed_dim)
         if strategy == "random":
             return model
+        # Try to download from Hugging Face Hub first
+        try:
+            print("🌐 Attempting to download ResNet from Hugging Face Hub...")
+            hf_path = hf_hub_download(
+                repo_id="Stylique/dressify-models",
+                filename="resnet_item_embedder_best.pth",
+                local_dir="models/exports",
+                local_dir_use_symlinks=False
+            )
+            print(f"📥 Downloaded ResNet from HF Hub: {hf_path}")
+            state = torch.load(hf_path, map_location="cpu")
+            state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
+            model.load_state_dict(state_dict, strict=False)
+            return model
+        except Exception as e:
+            print(f"❌ Failed to download ResNet from HF Hub: {e}")
+            print("⚠️ WARNING: Using untrained ResNet model!")
+            print("🚨 Recommendations will not be meaningful without trained weights!")
+        # Fallback to local checkpoints
         best_path = os.path.join(os.path.dirname(ckpt_path), "resnet_item_embedder_best.pth")
         if os.path.exists(best_path):
             ckpt_to_use = best_path
             ckpt_to_use = ckpt_path
         if os.path.exists(ckpt_to_use):
             state = torch.load(ckpt_to_use, map_location="cpu")
             state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
+            model.load_state_dict(state_dict, strict=False)
+            return model
         return model
     def _load_vit(self) -> nn.Module:
         model = OutfitCompatibilityModel(embedding_dim=self.embed_dim)
         if strategy == "random":
             return model
+        # Try to download from Hugging Face Hub first
+        try:
+            print("🌐 Attempting to download ViT from Hugging Face Hub...")
+            hf_path = hf_hub_download(
+                repo_id="Stylique/dressify-models",
+                filename="vit_outfit_model_best.pth",
+                local_dir="models/exports",
+                local_dir_use_symlinks=False
+            )
+            print(f"📥 Downloaded ViT from HF Hub: {hf_path}")
+            state = torch.load(hf_path, map_location="cpu")
+            state_dict = state.get("state_dict", state) if isinstance(state, dict) else state
+            model.load_state_dict(state_dict, strict=False)
+            return model
+        except Exception as e:
+            print(f"❌ Failed to download ViT from HF Hub: {e}")
+            print("⚠️ WARNING: Using untrained ViT model!")
+            print("🚨 Recommendations will not be meaningful without trained weights!")
+        # Fallback to local checkpoints
         best_path = os.path.join(os.path.dirname(ckpt_path), "vit_outfit_model_best.pth")
         ckpt_to_use = best_path if os.path.exists(best_path) else ckpt_path
         if os.path.exists(ckpt_to_use):
         min_size, max_size = 4, 6
         ids = list(range(len(proc_items)))
+        # Enhanced category-aware pools with diversity checks
         def cat_str(i: int) -> str:
             return (proc_items[i].get("category") or "").lower()
+        def get_category_type(cat: str) -> str:
+            """Map category to outfit slot type"""
+            if any(k in cat for k in ["top", "shirt", "tshirt", "blouse", "jacket", "hoodie", "sweater", "cardigan"]):
+                return "upper"
+            elif any(k in cat for k in ["pant", "trouser", "jean", "skirt", "short", "legging"]):
+                return "bottom"
+            elif any(k in cat for k in ["shoe", "sneaker", "boot", "heel", "sandal", "flat"]):
+                return "shoe"
+            elif any(k in cat for k in ["watch", "belt", "ring", "bracelet", "accessor", "bag", "hat", "scarf", "necklace"]):
+                return "accessory"
+            else:
+                return "other"
+        # Create category pools
+        uppers = [i for i in ids if get_category_type(cat_str(i)) == "upper"]
+        bottoms = [i for i in ids if get_category_type(cat_str(i)) == "bottom"]
+        shoes = [i for i in ids if get_category_type(cat_str(i)) == "shoe"]
+        accs = [i for i in ids if get_category_type(cat_str(i)) == "accessory"]
+        others = [i for i in ids if get_category_type(cat_str(i)) == "other"]
         candidates: List[List[int]] = []
         num_samples = max(num_outfits * 12, 24)
+        def has_category_diversity(subset: List[int]) -> bool:
+            """Check if subset has good category diversity"""
+            categories = [get_category_type(cat_str(i)) for i in subset]
+            unique_categories = set(categories)
+            # Require at least 3 different category types for good diversity
+            return len(unique_categories) >= 3
         for _ in range(num_samples):
             if uppers and bottoms and shoes and accs:
+                # Start with one item from each major category
                 subset = [
                     int(rng.choice(uppers)),
                     int(rng.choice(bottoms)),
                     int(rng.choice(shoes)),
                     int(rng.choice(accs)),
                 ]
+                # Add one more accessory or other item for variety
                 remain = list(set(ids) - set(subset))
+                if remain and rng.random() < 0.7:
+                    # Prefer accessories or other items
+                    pref_items = [i for i in remain if get_category_type(cat_str(i)) in ["accessory", "other"]]
+                    if pref_items:
+                        subset.append(int(rng.choice(pref_items)))
+                    else:
+                        subset.append(int(rng.choice(remain)))
             else:
+                # Fallback: ensure category diversity
                 k = int(rng.integers(min_size, max_size + 1))
+                attempts = 0
+                while attempts < 10:  # Try to find diverse subset
+                    subset = list(map(int, rng.choice(ids, size=k, replace=False).tolist()))
+                    if has_category_diversity(subset):
+                        break
+                    attempts += 1
+                # If we can't find diverse subset, use what we have
+                if attempts >= 10:
+                    subset = list(map(int, rng.choice(ids, size=k, replace=False).tolist()))
             candidates.append(subset)
         # 3) Score using ViT

train_resnet.py CHANGED Viewed

@@ -14,17 +14,20 @@ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 from data.polyvore import PolyvoreTripletDataset
 from models.resnet_embedder import ResNetItemEmbedder
 from utils.export import ensure_export_dir
 import json
 def parse_args() -> argparse.Namespace:
     p = argparse.ArgumentParser()
     p.add_argument("--data_root", type=str, default=os.getenv("POLYVORE_ROOT", "/home/user/app/data/Polyvore"))
-    p.add_argument("--epochs", type=int, default=20)
-    p.add_argument("--batch_size", type=int, default=8)
     p.add_argument("--lr", type=float, default=1e-3)
     p.add_argument("--embedding_dim", type=int, default=512)
     p.add_argument("--out", type=str, default="models/exports/resnet_item_embedder.pth")
     return p.parse_args()
@@ -80,8 +83,12 @@ def main() -> None:
     export_dir = ensure_export_dir(os.path.dirname(args.out) or "models/exports")
     best_loss = float("inf")
     history = []
     print(f"💾 Checkpoints will be saved to: {export_dir}")
     for epoch in range(args.epochs):
         model.train()
@@ -108,6 +115,14 @@ def main() -> None:
                 loss.backward()
                 optimizer.step()
                 running_loss += loss.item()
                 steps += 1
@@ -138,19 +153,53 @@ def main() -> None:
         history.append({"epoch": epoch + 1, "avg_triplet_loss": avg_loss})
-        if avg_loss < best_loss:
             best_loss = avg_loss
             best_path = os.path.join(export_dir, "resnet_item_embedder_best.pth")
             torch.save({"state_dict": model.state_dict(), "epoch": epoch+1, "loss": avg_loss}, best_path)
-            print(f"🏆 New best model saved: {best_path}")
-    # Write metrics
     metrics_path = os.path.join(export_dir, "resnet_metrics.json")
     with open(metrics_path, "w") as f:
-        json.dump({"best_triplet_loss": best_loss, "history": history}, f)
-    print(f"📊 Training completed! Best loss: {best_loss:.4f}")
-    print(f"📈 Metrics saved to: {metrics_path}")
 if __name__ == "__main__":

 from data.polyvore import PolyvoreTripletDataset
 from models.resnet_embedder import ResNetItemEmbedder
 from utils.export import ensure_export_dir
+from utils.advanced_metrics import AdvancedMetrics, calculate_triplet_metrics
 import json
 def parse_args() -> argparse.Namespace:
     p = argparse.ArgumentParser()
     p.add_argument("--data_root", type=str, default=os.getenv("POLYVORE_ROOT", "/home/user/app/data/Polyvore"))
+    p.add_argument("--epochs", type=int, default=50)
+    p.add_argument("--batch_size", type=int, default=16)
     p.add_argument("--lr", type=float, default=1e-3)
     p.add_argument("--embedding_dim", type=int, default=512)
     p.add_argument("--out", type=str, default="models/exports/resnet_item_embedder.pth")
+    p.add_argument("--early_stopping_patience", type=int, default=10, help="Early stopping patience")
+    p.add_argument("--min_delta", type=float, default=1e-4, help="Minimum change to qualify as improvement")
     return p.parse_args()
     export_dir = ensure_export_dir(os.path.dirname(args.out) or "models/exports")
     best_loss = float("inf")
     history = []
+    patience_counter = 0
+    best_epoch = 0
+    metrics_collector = AdvancedMetrics()
     print(f"💾 Checkpoints will be saved to: {export_dir}")
+    print(f"🛑 Early stopping patience: {args.early_stopping_patience} epochs")
     for epoch in range(args.epochs):
         model.train()
                 loss.backward()
                 optimizer.step()
+                # Collect metrics
+                triplet_metrics = calculate_triplet_metrics(emb_a, emb_p, emb_n, margin=0.2)
+                metrics_collector.add_batch(
+                    predictions=torch.ones(emb_a.size(0)),  # Placeholder for compatibility
+                    targets=torch.ones(emb_a.size(0)),      # Placeholder for compatibility
+                    embeddings=emb_a
+                )
                 running_loss += loss.item()
                 steps += 1
         history.append({"epoch": epoch + 1, "avg_triplet_loss": avg_loss})
+        # Early stopping logic
+        if avg_loss < best_loss - args.min_delta:
             best_loss = avg_loss
+            best_epoch = epoch + 1
+            patience_counter = 0
             best_path = os.path.join(export_dir, "resnet_item_embedder_best.pth")
             torch.save({"state_dict": model.state_dict(), "epoch": epoch+1, "loss": avg_loss}, best_path)
+            print(f"🏆 New best model saved: {best_path} (loss: {avg_loss:.4f})")
+        else:
+            patience_counter += 1
+            print(f"⏳ No improvement for {patience_counter} epochs (best: {best_loss:.4f} at epoch {best_epoch})")
+            if patience_counter >= args.early_stopping_patience:
+                print(f"🛑 Early stopping triggered after {patience_counter} epochs without improvement")
+                print(f"🏆 Best model was at epoch {best_epoch} with loss {best_loss:.4f}")
+                break
+    # Write comprehensive metrics
     metrics_path = os.path.join(export_dir, "resnet_metrics.json")
+    # Get advanced metrics
+    advanced_metrics = metrics_collector.calculate_all_metrics()
+    final_metrics = {
+        "best_triplet_loss": best_loss,
+        "best_epoch": best_epoch,
+        "total_epochs": epoch + 1,
+        "early_stopping_triggered": patience_counter >= args.early_stopping_patience,
+        "patience_counter": patience_counter,
+        "training_config": {
+            "epochs": args.epochs,
+            "batch_size": args.batch_size,
+            "learning_rate": args.lr,
+            "embedding_dim": args.embedding_dim,
+            "early_stopping_patience": args.early_stopping_patience,
+            "min_delta": args.min_delta
+        },
+        "history": history,
+        "advanced_metrics": advanced_metrics
+    }
     with open(metrics_path, "w") as f:
+        json.dump(final_metrics, f, indent=2)
+    print(f"📊 Training completed! Best loss: {best_loss:.4f} at epoch {best_epoch}")
+    print(f"📈 Comprehensive metrics saved to: {metrics_path}")
+    print(f"🔬 Advanced metrics: {advanced_metrics['summary']}")
 if __name__ == "__main__":

train_vit_triplet.py CHANGED Viewed

@@ -15,19 +15,22 @@ from data.polyvore import PolyvoreOutfitTripletDataset
 from models.vit_outfit import OutfitCompatibilityModel
 from models.resnet_embedder import ResNetItemEmbedder
 from utils.export import ensure_export_dir
 import json
 def parse_args() -> argparse.Namespace:
     p = argparse.ArgumentParser()
     p.add_argument("--data_root", type=str, default=os.getenv("POLYVORE_ROOT", "/home/user/app/data/Polyvore"))
-    p.add_argument("--epochs", type=int, default=30)
-    p.add_argument("--batch_size", type=int, default=8)
     p.add_argument("--lr", type=float, default=5e-4)
     p.add_argument("--embedding_dim", type=int, default=512)
     p.add_argument("--triplet_margin", type=float, default=0.3)
     p.add_argument("--export", type=str, default="models/exports/vit_outfit_model.pth")
     p.add_argument("--eval_every", type=int, default=1)
     return p.parse_args()
@@ -105,8 +108,12 @@ def main() -> None:
     export_dir = ensure_export_dir(os.path.dirname(args.export) or "models/exports")
     best_loss = float("inf")
     hist = []
     print(f"💾 Checkpoints will be saved to: {export_dir}")
     for epoch in range(args.epochs):
         model.train()
@@ -145,6 +152,16 @@ def main() -> None:
                 loss.backward()
                 optimizer.step()
                 running_loss += loss.item()
                 steps += 1
@@ -210,25 +227,58 @@ def main() -> None:
         if val_loss is not None:
             print(f"✅ Epoch {epoch+1}/{args.epochs} triplet_loss={avg_loss:.4f} val_triplet_loss={val_loss:.4f} saved -> {out_path}")
             hist.append({"epoch": epoch + 1, "triplet_loss": float(avg_loss), "val_triplet_loss": float(val_loss)})
-            if val_loss < best_loss:
                 best_loss = val_loss
                 best_path = os.path.join(export_dir, "vit_outfit_model_best.pth")
                 torch.save({"state_dict": model.state_dict(), "epoch": epoch+1, "loss": avg_loss, "val_loss": val_loss}, best_path)
-                print(f"🏆 New best model saved: {best_path}")
         else:
             print(f"✅ Epoch {epoch+1}/{args.epochs} triplet_loss={avg_loss:.4f} saved -> {out_path}")
             hist.append({"epoch": epoch + 1, "triplet_loss": float(avg_loss)})
-    # Write metrics
     metrics_path = os.path.join(export_dir, "vit_metrics.json")
-    payload = {"best_val_triplet_loss": best_loss if best_loss != float("inf") else None, "history": hist}
     with open(metrics_path, "w") as f:
-        json.dump(payload, f)
-    print(f"📊 Training completed!")
-    if best_loss != float("inf"):
-        print(f"🏆 Best validation loss: {best_loss:.4f}")
-    print(f"📈 Metrics saved to: {metrics_path}")
 if __name__ == "__main__":

 from models.vit_outfit import OutfitCompatibilityModel
 from models.resnet_embedder import ResNetItemEmbedder
 from utils.export import ensure_export_dir
+from utils.advanced_metrics import AdvancedMetrics, calculate_outfit_compatibility_metrics
 import json
 def parse_args() -> argparse.Namespace:
     p = argparse.ArgumentParser()
     p.add_argument("--data_root", type=str, default=os.getenv("POLYVORE_ROOT", "/home/user/app/data/Polyvore"))
+    p.add_argument("--epochs", type=int, default=50)
+    p.add_argument("--batch_size", type=int, default=16)
     p.add_argument("--lr", type=float, default=5e-4)
     p.add_argument("--embedding_dim", type=int, default=512)
     p.add_argument("--triplet_margin", type=float, default=0.3)
     p.add_argument("--export", type=str, default="models/exports/vit_outfit_model.pth")
     p.add_argument("--eval_every", type=int, default=1)
+    p.add_argument("--early_stopping_patience", type=int, default=10, help="Early stopping patience")
+    p.add_argument("--min_delta", type=float, default=1e-4, help="Minimum change to qualify as improvement")
     return p.parse_args()
     export_dir = ensure_export_dir(os.path.dirname(args.export) or "models/exports")
     best_loss = float("inf")
     hist = []
+    patience_counter = 0
+    best_epoch = 0
+    metrics_collector = AdvancedMetrics()
     print(f"💾 Checkpoints will be saved to: {export_dir}")
+    print(f"🛑 Early stopping patience: {args.early_stopping_patience} epochs")
     for epoch in range(args.epochs):
         model.train()
                 loss.backward()
                 optimizer.step()
+                # Collect metrics
+                compatibility_metrics = calculate_outfit_compatibility_metrics(
+                    torch.cat([ea, ep, en], dim=0),
+                    torch.cat([torch.ones(ea.size(0)), torch.ones(ep.size(0)), torch.zeros(en.size(0))], dim=0)
+                )
+                metrics_collector.add_batch(
+                    predictions=torch.cat([ea, ep, en], dim=0),
+                    targets=torch.cat([torch.ones(ea.size(0)), torch.ones(ep.size(0)), torch.zeros(en.size(0))], dim=0)
+                )
                 running_loss += loss.item()
                 steps += 1
         if val_loss is not None:
             print(f"✅ Epoch {epoch+1}/{args.epochs} triplet_loss={avg_loss:.4f} val_triplet_loss={val_loss:.4f} saved -> {out_path}")
             hist.append({"epoch": epoch + 1, "triplet_loss": float(avg_loss), "val_triplet_loss": float(val_loss)})
+            # Early stopping logic
+            if val_loss < best_loss - args.min_delta:
                 best_loss = val_loss
+                best_epoch = epoch + 1
+                patience_counter = 0
                 best_path = os.path.join(export_dir, "vit_outfit_model_best.pth")
                 torch.save({"state_dict": model.state_dict(), "epoch": epoch+1, "loss": avg_loss, "val_loss": val_loss}, best_path)
+                print(f"🏆 New best model saved: {best_path} (val_loss: {val_loss:.4f})")
+            else:
+                patience_counter += 1
+                print(f"⏳ No improvement for {patience_counter} epochs (best: {best_loss:.4f} at epoch {best_epoch})")
+                if patience_counter >= args.early_stopping_patience:
+                    print(f"🛑 Early stopping triggered after {patience_counter} epochs without improvement")
+                    print(f"🏆 Best model was at epoch {best_epoch} with val_loss {best_loss:.4f}")
+                    break
         else:
             print(f"✅ Epoch {epoch+1}/{args.epochs} triplet_loss={avg_loss:.4f} saved -> {out_path}")
             hist.append({"epoch": epoch + 1, "triplet_loss": float(avg_loss)})
+    # Write comprehensive metrics
     metrics_path = os.path.join(export_dir, "vit_metrics.json")
+    # Get advanced metrics
+    advanced_metrics = metrics_collector.calculate_all_metrics()
+    final_metrics = {
+        "best_val_triplet_loss": best_loss if best_loss != float("inf") else None,
+        "best_epoch": best_epoch,
+        "total_epochs": epoch + 1,
+        "early_stopping_triggered": patience_counter >= args.early_stopping_patience,
+        "patience_counter": patience_counter,
+        "training_config": {
+            "epochs": args.epochs,
+            "batch_size": args.batch_size,
+            "learning_rate": args.lr,
+            "embedding_dim": args.embedding_dim,
+            "triplet_margin": args.triplet_margin,
+            "early_stopping_patience": args.early_stopping_patience,
+            "min_delta": args.min_delta
+        },
+        "history": hist,
+        "advanced_metrics": advanced_metrics
+    }
     with open(metrics_path, "w") as f:
+        json.dump(final_metrics, f, indent=2)
+    print(f"📊 Training completed! Best val_loss: {best_loss:.4f} at epoch {best_epoch}")
+    print(f"📈 Comprehensive metrics saved to: {metrics_path}")
+    print(f"🔬 Advanced metrics: {advanced_metrics['summary']}")
 if __name__ == "__main__":

utils/advanced_metrics.py ADDED Viewed

	@@ -0,0 +1,287 @@

+"""
+Advanced metrics calculation for outfit recommendation system.
+Includes accuracy, precision, recall, F1 score, and other research-grade metrics.
+"""
+import numpy as np
+import torch
+import torch.nn.functional as F
+from typing import Dict, List, Any, Tuple
+from sklearn.metrics import accuracy_score, precision_recall_fscore_support, roc_auc_score
+import json
+from pathlib import Path
+class AdvancedMetrics:
+    """Calculate comprehensive metrics for outfit recommendation models."""
+    def __init__(self):
+        self.reset()
+    def reset(self):
+        """Reset all metrics."""
+        self.predictions = []
+        self.targets = []
+        self.scores = []
+        self.embeddings = []
+        self.outfit_scores = []
+    def add_batch(self, predictions: torch.Tensor, targets: torch.Tensor,
+                  scores: torch.Tensor = None, embeddings: torch.Tensor = None):
+        """Add a batch of predictions and targets."""
+        self.predictions.extend(predictions.cpu().numpy())
+        self.targets.extend(targets.cpu().numpy())
+        if scores is not None:
+            self.scores.extend(scores.cpu().numpy())
+        if embeddings is not None:
+            self.embeddings.extend(embeddings.cpu().numpy())
+    def add_outfit_scores(self, outfit_scores: List[float]):
+        """Add outfit compatibility scores."""
+        self.outfit_scores.extend(outfit_scores)
+    def calculate_classification_metrics(self) -> Dict[str, float]:
+        """Calculate classification metrics."""
+        if not self.predictions or not self.targets:
+            return {}
+        preds = np.array(self.predictions)
+        targets = np.array(self.targets)
+        # Convert to binary if needed
+        if preds.max() > 1:
+            preds = (preds > 0.5).astype(int)
+        if targets.max() > 1:
+            targets = (targets > 0.5).astype(int)
+        accuracy = accuracy_score(targets, preds)
+        precision, recall, f1, _ = precision_recall_fscore_support(
+            targets, preds, average='weighted', zero_division=0
+        )
+        # Calculate per-class metrics
+        precision_macro, recall_macro, f1_macro, _ = precision_recall_fscore_support(
+            targets, preds, average='macro', zero_division=0
+        )
+        # Calculate AUC if we have scores
+        auc = None
+        if self.scores:
+            try:
+                scores_array = np.array(self.scores)
+                if len(np.unique(targets)) > 1:  # Need both classes for AUC
+                    auc = roc_auc_score(targets, scores_array)
+            except ValueError:
+                auc = None
+        return {
+            "accuracy": float(accuracy),
+            "precision_weighted": float(precision),
+            "recall_weighted": float(recall),
+            "f1_weighted": float(f1),
+            "precision_macro": float(precision_macro),
+            "recall_macro": float(recall_macro),
+            "f1_macro": float(f1_macro),
+            "auc": float(auc) if auc is not None else None
+        }
+    def calculate_embedding_metrics(self) -> Dict[str, float]:
+        """Calculate embedding quality metrics."""
+        if not self.embeddings:
+            return {}
+        embeddings = np.array(self.embeddings)
+        # Calculate embedding statistics
+        mean_norm = np.mean(np.linalg.norm(embeddings, axis=1))
+        std_norm = np.std(np.linalg.norm(embeddings, axis=1))
+        # Calculate intra-class and inter-class distances
+        if len(self.targets) > 1:
+            targets = np.array(self.targets)
+            unique_classes = np.unique(targets)
+            intra_class_distances = []
+            inter_class_distances = []
+            for class_label in unique_classes:
+                class_embeddings = embeddings[targets == class_label]
+                if len(class_embeddings) > 1:
+                    # Intra-class distances
+                    for i in range(len(class_embeddings)):
+                        for j in range(i + 1, len(class_embeddings)):
+                            dist = np.linalg.norm(class_embeddings[i] - class_embeddings[j])
+                            intra_class_distances.append(dist)
+                # Inter-class distances
+                other_embeddings = embeddings[targets != class_label]
+                if len(other_embeddings) > 0:
+                    for class_emb in class_embeddings:
+                        for other_emb in other_embeddings:
+                            dist = np.linalg.norm(class_emb - other_emb)
+                            inter_class_distances.append(dist)
+            avg_intra_class = np.mean(intra_class_distances) if intra_class_distances else 0
+            avg_inter_class = np.mean(inter_class_distances) if inter_class_distances else 0
+            # Separation ratio (higher is better)
+            separation_ratio = avg_inter_class / (avg_intra_class + 1e-8)
+        else:
+            avg_intra_class = 0
+            avg_inter_class = 0
+            separation_ratio = 0
+        return {
+            "embedding_mean_norm": float(mean_norm),
+            "embedding_std_norm": float(std_norm),
+            "avg_intra_class_distance": float(avg_intra_class),
+            "avg_inter_class_distance": float(avg_inter_class),
+            "separation_ratio": float(separation_ratio)
+        }
+    def calculate_outfit_metrics(self) -> Dict[str, float]:
+        """Calculate outfit-specific metrics."""
+        if not self.outfit_scores:
+            return {}
+        scores = np.array(self.outfit_scores)
+        return {
+            "outfit_score_mean": float(np.mean(scores)),
+            "outfit_score_std": float(np.std(scores)),
+            "outfit_score_min": float(np.min(scores)),
+            "outfit_score_max": float(np.max(scores)),
+            "outfit_score_median": float(np.median(scores))
+        }
+    def calculate_all_metrics(self) -> Dict[str, Any]:
+        """Calculate all available metrics."""
+        metrics = {
+            "classification": self.calculate_classification_metrics(),
+            "embeddings": self.calculate_embedding_metrics(),
+            "outfits": self.calculate_outfit_metrics()
+        }
+        # Add summary statistics
+        metrics["summary"] = {
+            "total_predictions": len(self.predictions),
+            "total_targets": len(self.targets),
+            "total_scores": len(self.scores),
+            "total_embeddings": len(self.embeddings),
+            "total_outfit_scores": len(self.outfit_scores)
+        }
+        return metrics
+    def save_metrics(self, filepath: str, additional_info: Dict[str, Any] = None):
+        """Save metrics to JSON file."""
+        metrics = self.calculate_all_metrics()
+        if additional_info:
+            metrics["additional_info"] = additional_info
+        # Ensure directory exists
+        Path(filepath).parent.mkdir(parents=True, exist_ok=True)
+        with open(filepath, 'w') as f:
+            json.dump(metrics, f, indent=2)
+        return metrics
+def calculate_triplet_metrics(anchor_emb: torch.Tensor, positive_emb: torch.Tensor,
+                            negative_emb: torch.Tensor, margin: float = 0.2) -> Dict[str, float]:
+    """Calculate triplet-specific metrics."""
+    # Calculate distances
+    pos_dist = F.pairwise_distance(anchor_emb, positive_emb, p=2)
+    neg_dist = F.pairwise_distance(anchor_emb, negative_emb, p=2)
+    # Triplet loss
+    triplet_loss = F.relu(pos_dist - neg_dist + margin).mean()
+    # Accuracy: positive distance < negative distance
+    correct = (pos_dist < neg_dist).float().mean()
+    # Margin violations
+    margin_violations = (pos_dist - neg_dist + margin > 0).float().mean()
+    # Distance statistics
+    pos_dist_mean = pos_dist.mean()
+    neg_dist_mean = neg_dist.mean()
+    distance_ratio = neg_dist_mean / (pos_dist_mean + 1e-8)
+    return {
+        "triplet_loss": float(triplet_loss),
+        "triplet_accuracy": float(correct),
+        "margin_violations": float(margin_violations),
+        "positive_distance_mean": float(pos_dist_mean),
+        "negative_distance_mean": float(neg_dist_mean),
+        "distance_ratio": float(distance_ratio)
+    }
+def calculate_outfit_compatibility_metrics(outfit_scores: torch.Tensor,
+                                         labels: torch.Tensor) -> Dict[str, float]:
+    """Calculate outfit compatibility specific metrics."""
+    # Convert to numpy for sklearn compatibility
+    scores_np = outfit_scores.cpu().numpy()
+    labels_np = labels.cpu().numpy()
+    # Binary classification metrics
+    pred_binary = (scores_np > 0.5).astype(int)
+    accuracy = accuracy_score(labels_np, pred_binary)
+    precision, recall, f1, _ = precision_recall_fscore_support(
+        labels_np, pred_binary, average='weighted', zero_division=0
+    )
+    # AUC if we have both classes
+    auc = None
+    if len(np.unique(labels_np)) > 1:
+        try:
+            auc = roc_auc_score(labels_np, scores_np)
+        except ValueError:
+            auc = None
+    # Score distribution metrics
+    compatible_scores = scores_np[labels_np == 1]
+    incompatible_scores = scores_np[labels_np == 0]
+    return {
+        "compatibility_accuracy": float(accuracy),
+        "compatibility_precision": float(precision),
+        "compatibility_recall": float(recall),
+        "compatibility_f1": float(f1),
+        "compatibility_auc": float(auc) if auc is not None else None,
+        "compatible_score_mean": float(np.mean(compatible_scores)) if len(compatible_scores) > 0 else 0,
+        "incompatible_score_mean": float(np.mean(incompatible_scores)) if len(incompatible_scores) > 0 else 0,
+        "score_separation": float(np.mean(compatible_scores) - np.mean(incompatible_scores)) if len(compatible_scores) > 0 and len(incompatible_scores) > 0 else 0
+    }
+if __name__ == "__main__":
+    # Example usage
+    metrics = AdvancedMetrics()
+    # Simulate some data
+    predictions = torch.randn(100, 1)
+    targets = torch.randint(0, 2, (100, 1)).float()
+    scores = torch.sigmoid(predictions)
+    embeddings = torch.randn(100, 512)
+    metrics.add_batch(predictions, targets, scores, embeddings)
+    metrics.add_outfit_scores(scores.flatten().tolist())
+    # Calculate and save metrics
+    all_metrics = metrics.calculate_all_metrics()
+    print("Calculated metrics:")
+    print(json.dumps(all_metrics, indent=2))
+    # Save to file
+    metrics.save_metrics("test_metrics.json", {"model": "test", "epoch": 1})

utils/hf_utils.py CHANGED Viewed

@@ -130,6 +130,88 @@ class HFModelManager:
         except Exception as e:
             print(f"Failed to list repo files: {e}")
             return []
 def push_model_to_hub(

         except Exception as e:
             print(f"Failed to list repo files: {e}")
             return []
+    def upload_model(self, model_type: str, repo_name: str) -> Dict[str, Any]:
+        """Upload models or data to HF Hub based on type."""
+        try:
+            if model_type == "models":
+                # Upload model checkpoints
+                repo_id = f"{self.username}/{repo_name}"
+                self.create_model_repo(repo_name, private=False)
+                # Upload best model checkpoints
+                model_files = [
+                    "models/exports/resnet_item_embedder_best.pth",
+                    "models/exports/vit_outfit_model_best.pth",
+                    "models/exports/resnet_metrics.json",
+                    "models/exports/vit_metrics.json"
+                ]
+                uploaded_files = []
+                for file_path in model_files:
+                    if os.path.exists(file_path):
+                        success = self.push_checkpoint(file_path, repo_id, f"Upload {os.path.basename(file_path)}")
+                        if success:
+                            uploaded_files.append(os.path.basename(file_path))
+                return {"success": True, "uploaded_files": uploaded_files, "repo_id": repo_id}
+            elif model_type == "splits":
+                # Upload dataset splits
+                repo_id = f"{self.username}/{repo_name}"
+                try:
+                    create_repo(
+                        repo_id=repo_id,
+                        repo_type="dataset",
+                        private=False,
+                        exist_ok=True
+                    )
+                except Exception as e:
+                    print(f"Note: Repo might already exist: {e}")
+                # Upload split files
+                split_files = [
+                    "data/Polyvore/splits/train.json",
+                    "data/Polyvore/splits/valid.json",
+                    "data/Polyvore/splits/test.json",
+                    "data/Polyvore/splits/outfit_triplets_train.json",
+                    "data/Polyvore/splits/outfit_triplets_valid.json",
+                    "data/Polyvore/splits/outfit_triplets_test.json"
+                ]
+                uploaded_files = []
+                for file_path in split_files:
+                    if os.path.exists(file_path):
+                        try:
+                            upload_file(
+                                path_or_fileobj=file_path,
+                                path_in_repo=f"splits/{os.path.basename(file_path)}",
+                                repo_id=repo_id,
+                                repo_type="dataset",
+                                commit_message=f"Upload {os.path.basename(file_path)}"
+                            )
+                            uploaded_files.append(os.path.basename(file_path))
+                        except Exception as e:
+                            print(f"Failed to upload {file_path}: {e}")
+                return {"success": True, "uploaded_files": uploaded_files, "repo_id": repo_id}
+            elif model_type == "everything":
+                # Upload everything
+                models_result = self.upload_model("models", "dressify-models")
+                splits_result = self.upload_model("splits", "Dressify-Helper")
+                return {
+                    "success": models_result["success"] and splits_result["success"],
+                    "models": models_result,
+                    "splits": splits_result
+                }
+            else:
+                return {"success": False, "error": f"Unknown model type: {model_type}"}
+        except Exception as e:
+            return {"success": False, "error": str(e)}
 def push_model_to_hub(

utils/triplet_mining.py CHANGED Viewed

@@ -281,3 +281,4 @@ if __name__ == "__main__":
     print(f"Anchor indices: {anchors[:5]}")
     print(f"Positive indices: {positives[:5]}")
     print(f"Negative indices: {negatives[:5]}")

     print(f"Anchor indices: {anchors[:5]}")
     print(f"Positive indices: {positives[:5]}")
     print(f"Negative indices: {negatives[:5]}")