Deploy from GitHub main
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -33
- .github/workflows/deploy-to-hf-spaces.yml +52 -0
- .gitignore +103 -0
- DEPLOYMENT_CHECKLIST.md +147 -0
- README.md +366 -9
- docs/PHASE1D_EVALUATION_GUIDE.md +205 -0
- docs/PHASE1E_LORA_GUIDE.md +270 -0
- docs/PHASE1_USAGE.md +454 -0
- docs/PHASE2A_AUDIO_TO_IMAGE_GUIDE.md +307 -0
- docs/PHASE2B_DATA_TO_IMAGE_GUIDE.md +271 -0
- docs/PHASE2C_EMOTION_TO_IMAGE_GUIDE.md +286 -0
- docs/PHASE2D_REALTIME_DATA_TO_IMAGE_GUIDE.md +337 -0
- docs/PHASE2E_STYLE_REFERENCE_GUIDE.md +271 -0
- docs/PHASE3E_PERFORMANCE_GUIDE.md +301 -0
- docs/PHASE3_FINAL_DASHBOARD_GUIDE.md +274 -0
- docs/PHASE4_DEPLOYMENT_GUIDE.md +110 -0
- docs/PHASE4_RUNTIME_OPTIMIZATION.md +111 -0
- docs/PROJECT_STRUCTURE.md +158 -0
- docs/README.md +38 -0
- packages.txt +3 -0
- requirements.txt +73 -0
- run_phase3_final_dashboard.py +299 -0
- src/__init__.py +7 -0
- src/config.py +42 -0
- src/generators/__init__.py +26 -0
- src/generators/compi_phase1_advanced.py +230 -0
- src/generators/compi_phase1_text2image.py +117 -0
- src/generators/compi_phase1b_advanced_styling.py +338 -0
- src/generators/compi_phase1b_styled_generation.py +172 -0
- src/generators/compi_phase1d_cli_evaluation.py +341 -0
- src/generators/compi_phase1d_evaluate_quality.py +496 -0
- src/generators/compi_phase1e_dataset_prep.py +329 -0
- src/generators/compi_phase1e_lora_training.py +458 -0
- src/generators/compi_phase1e_style_generation.py +406 -0
- src/generators/compi_phase1e_style_manager.py +386 -0
- src/generators/compi_phase2a_audio_to_image.py +350 -0
- src/generators/compi_phase2b_data_to_image.py +432 -0
- src/generators/compi_phase2c_emotion_to_image.py +408 -0
- src/generators/compi_phase2d_realtime_to_image.py +483 -0
- src/generators/compi_phase2e_refimg_to_image.py +578 -0
- src/setup_env.py +118 -0
- src/ui/__init__.py +14 -0
- src/ui/compi_phase3_final_dashboard.py +1709 -0
- src/utils/__init__.py +16 -0
- src/utils/audio_utils.py +342 -0
- src/utils/data_utils.py +654 -0
- src/utils/emotion_utils.py +446 -0
- src/utils/file_utils.py +149 -0
- src/utils/image_utils.py +309 -0
- src/utils/logging_utils.py +45 -0
.gitattributes
CHANGED
|
@@ -1,35 +1,4 @@
|
|
| 1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 3 |
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/workflows/deploy-to-hf-spaces.yml
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Deploy to HF Spaces (on main)
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
workflow_dispatch: {}
|
| 7 |
+
|
| 8 |
+
concurrency:
|
| 9 |
+
group: deploy-hf-space
|
| 10 |
+
cancel-in-progress: true
|
| 11 |
+
|
| 12 |
+
jobs:
|
| 13 |
+
deploy:
|
| 14 |
+
runs-on: ubuntu-latest
|
| 15 |
+
steps:
|
| 16 |
+
- name: Checkout repo
|
| 17 |
+
uses: actions/checkout@v4
|
| 18 |
+
|
| 19 |
+
- name: Setup Python
|
| 20 |
+
uses: actions/setup-python@v5
|
| 21 |
+
with:
|
| 22 |
+
python-version: "3.11"
|
| 23 |
+
|
| 24 |
+
- name: Install deps
|
| 25 |
+
run: |
|
| 26 |
+
python -m pip install -U pip
|
| 27 |
+
pip install -U "huggingface_hub[cli]"
|
| 28 |
+
|
| 29 |
+
- name: Validate app file exists
|
| 30 |
+
run: |
|
| 31 |
+
test -f src/ui/compi_phase3_final_dashboard.py || (echo "App file missing" && exit 1)
|
| 32 |
+
|
| 33 |
+
- name: Push folder to Space
|
| 34 |
+
env:
|
| 35 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 36 |
+
HF_SPACE_ID: ${{ secrets.HF_SPACE_ID }}
|
| 37 |
+
run: |
|
| 38 |
+
python - << 'PY'
|
| 39 |
+
from huggingface_hub import HfApi
|
| 40 |
+
import os
|
| 41 |
+
api = HfApi()
|
| 42 |
+
repo_id = os.environ["HF_SPACE_ID"] # e.g. "username/compi-final-dashboard"
|
| 43 |
+
api.upload_folder(
|
| 44 |
+
token=os.environ["HF_TOKEN"],
|
| 45 |
+
repo_id=repo_id,
|
| 46 |
+
repo_type="space",
|
| 47 |
+
folder_path=".",
|
| 48 |
+
path_in_repo="",
|
| 49 |
+
commit_message="Deploy from GitHub main"
|
| 50 |
+
)
|
| 51 |
+
print("Uploaded to Space:", repo_id)
|
| 52 |
+
PY
|
.gitignore
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
MANIFEST
|
| 23 |
+
|
| 24 |
+
# Virtual Environments
|
| 25 |
+
compi-env/
|
| 26 |
+
venv/
|
| 27 |
+
env/
|
| 28 |
+
ENV/
|
| 29 |
+
env.bak/
|
| 30 |
+
venv.bak/
|
| 31 |
+
|
| 32 |
+
# IDE
|
| 33 |
+
.vscode/
|
| 34 |
+
.idea/
|
| 35 |
+
*.swp
|
| 36 |
+
*.swo
|
| 37 |
+
*~
|
| 38 |
+
|
| 39 |
+
# Jupyter Notebook
|
| 40 |
+
.ipynb_checkpoints
|
| 41 |
+
|
| 42 |
+
# Environment variables
|
| 43 |
+
.env
|
| 44 |
+
.env.local
|
| 45 |
+
.env.*.local
|
| 46 |
+
|
| 47 |
+
# Data files (large datasets)
|
| 48 |
+
data/raw/
|
| 49 |
+
data/processed/
|
| 50 |
+
*.csv
|
| 51 |
+
*.json
|
| 52 |
+
*.parquet
|
| 53 |
+
*.h5
|
| 54 |
+
*.hdf5
|
| 55 |
+
|
| 56 |
+
# Model files (large pretrained models)
|
| 57 |
+
models/pretrained/
|
| 58 |
+
*.pth
|
| 59 |
+
*.pt
|
| 60 |
+
*.ckpt
|
| 61 |
+
*.safetensors
|
| 62 |
+
|
| 63 |
+
# Generated outputs
|
| 64 |
+
outputs/images/
|
| 65 |
+
outputs/audio/
|
| 66 |
+
outputs/videos/
|
| 67 |
+
*.wav
|
| 68 |
+
*.mp3
|
| 69 |
+
*.mp4
|
| 70 |
+
*.avi
|
| 71 |
+
*.png
|
| 72 |
+
*.jpg
|
| 73 |
+
*.jpeg
|
| 74 |
+
*.gif
|
| 75 |
+
|
| 76 |
+
# Logs
|
| 77 |
+
logs/
|
| 78 |
+
*.log
|
| 79 |
+
|
| 80 |
+
# Cache
|
| 81 |
+
.cache/
|
| 82 |
+
*.cache
|
| 83 |
+
|
| 84 |
+
# OS
|
| 85 |
+
.DS_Store
|
| 86 |
+
.DS_Store?
|
| 87 |
+
._*
|
| 88 |
+
.Spotlight-V100
|
| 89 |
+
.Trashes
|
| 90 |
+
ehthumbs.db
|
| 91 |
+
Thumbs.db
|
| 92 |
+
|
| 93 |
+
# Temporary files
|
| 94 |
+
*.tmp
|
| 95 |
+
*.temp
|
| 96 |
+
temp/
|
| 97 |
+
tmp/
|
| 98 |
+
|
| 99 |
+
# Weights & Biases
|
| 100 |
+
wandb/
|
| 101 |
+
|
| 102 |
+
# Hugging Face cache
|
| 103 |
+
.cache/huggingface/
|
DEPLOYMENT_CHECKLIST.md
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🚀 CompI Phase 4 Deployment Checklist
|
| 2 |
+
|
| 3 |
+
## Prerequisites
|
| 4 |
+
- [ ] GitHub account with your CompI repository
|
| 5 |
+
- [ ] Hugging Face account ([sign up here](https://huggingface.co/join))
|
| 6 |
+
- [ ] OpenWeatherMap API key (optional, for real-time weather data)
|
| 7 |
+
|
| 8 |
+
## Phase 4.A: Repository Preparation ✅
|
| 9 |
+
- [x] `packages.txt` created with system dependencies
|
| 10 |
+
- [x] `.gitattributes` created for Git LFS support
|
| 11 |
+
- [x] `requirements.txt` verified and ready
|
| 12 |
+
- [x] GitHub Actions workflow created
|
| 13 |
+
|
| 14 |
+
## Phase 4.B: Hugging Face Space Setup
|
| 15 |
+
|
| 16 |
+
### Step 1: Create Space
|
| 17 |
+
1. [ ] Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 18 |
+
2. [ ] Click "Create new Space"
|
| 19 |
+
3. [ ] Fill in details:
|
| 20 |
+
- **Owner**: Your username
|
| 21 |
+
- **Space name**: `compi-final-dashboard` (or your choice)
|
| 22 |
+
- **License**: Apache 2.0 (recommended)
|
| 23 |
+
- **SDK**: Streamlit
|
| 24 |
+
- **Hardware**: CPU basic (free)
|
| 25 |
+
- **Visibility**: Public
|
| 26 |
+
|
| 27 |
+
### Step 2: Configure Space
|
| 28 |
+
4. [ ] In your new Space, click "Files" → "README.md" → Edit
|
| 29 |
+
5. [ ] Replace content with this header:
|
| 30 |
+
|
| 31 |
+
```markdown
|
| 32 |
+
---
|
| 33 |
+
title: CompI — Final Dashboard
|
| 34 |
+
emoji: 🎨
|
| 35 |
+
sdk: streamlit
|
| 36 |
+
app_file: src/ui/compi_phase3_final_dashboard.py
|
| 37 |
+
pinned: false
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
# CompI - Multimodal AI Art Generation Platform
|
| 41 |
+
|
| 42 |
+
The ultimate creative platform combining text, audio, data, emotion, and real-time inputs for AI art generation.
|
| 43 |
+
|
| 44 |
+
## Features
|
| 45 |
+
|
| 46 |
+
🧩 **Multimodal Inputs** - Text, Audio, Data, Emotion, Real-time feeds
|
| 47 |
+
🖼️ **Advanced References** - Multi-image upload with role assignment
|
| 48 |
+
⚙️ **Model Management** - SD 1.5/SDXL switching, LoRA integration
|
| 49 |
+
🖼️ **Professional Gallery** - Filtering, rating, annotation system
|
| 50 |
+
💾 **Preset Management** - Save/load complete configurations
|
| 51 |
+
📦 **Export System** - Complete bundles with metadata
|
| 52 |
+
|
| 53 |
+
## Usage
|
| 54 |
+
|
| 55 |
+
1. Configure your inputs in the "Inputs" tab
|
| 56 |
+
2. Upload reference images in "Advanced References"
|
| 57 |
+
3. Choose your model and performance settings
|
| 58 |
+
4. Generate with intelligent fusion of all inputs
|
| 59 |
+
5. Review results in the gallery and export bundles
|
| 60 |
+
|
| 61 |
+
Built with Streamlit, PyTorch, and Diffusers.
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
6. [ ] Commit the README changes
|
| 65 |
+
|
| 66 |
+
### Step 3: Add Secrets (Optional)
|
| 67 |
+
7. [ ] Go to Space Settings → Repository secrets
|
| 68 |
+
8. [ ] Add secret: `OPENWEATHER_KEY` = `your_api_key_here`
|
| 69 |
+
|
| 70 |
+
## Phase 4.C: GitHub Actions Setup
|
| 71 |
+
|
| 72 |
+
### Step 1: Get Hugging Face Token
|
| 73 |
+
9. [ ] Go to [HF Settings → Access Tokens](https://huggingface.co/settings/tokens)
|
| 74 |
+
10. [ ] Click "New token"
|
| 75 |
+
11. [ ] Name: `CompI Deployment`
|
| 76 |
+
12. [ ] Type: **Write**
|
| 77 |
+
13. [ ] Click "Generate"
|
| 78 |
+
14. [ ] **Copy the token** (you won't see it again!)
|
| 79 |
+
|
| 80 |
+
### Step 2: Add GitHub Secrets
|
| 81 |
+
15. [ ] Go to your GitHub repo → Settings → Secrets and variables → Actions
|
| 82 |
+
16. [ ] Click "New repository secret"
|
| 83 |
+
17. [ ] Add secret 1:
|
| 84 |
+
- **Name**: `HF_TOKEN`
|
| 85 |
+
- **Secret**: Paste your HF token from step 14
|
| 86 |
+
18. [ ] Add secret 2:
|
| 87 |
+
- **Name**: `HF_SPACE_ID`
|
| 88 |
+
- **Secret**: `your-username/your-space-name` (e.g., `AXRZCE/compi-final-dashboard`)
|
| 89 |
+
|
| 90 |
+
## Phase 4.D: Test Deployment
|
| 91 |
+
|
| 92 |
+
### Step 1: Trigger First Deploy
|
| 93 |
+
19. [ ] In your GitHub repo, make a small change (e.g., edit README.md)
|
| 94 |
+
20. [ ] Commit to `main` branch:
|
| 95 |
+
```bash
|
| 96 |
+
git add .
|
| 97 |
+
git commit -m "Initial deployment setup"
|
| 98 |
+
git push origin main
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
### Step 2: Monitor Deployment
|
| 102 |
+
21. [ ] Go to GitHub repo → Actions tab
|
| 103 |
+
22. [ ] Watch the "Deploy to HF Spaces (on main)" workflow
|
| 104 |
+
23. [ ] Verify it completes successfully (green checkmark)
|
| 105 |
+
|
| 106 |
+
### Step 3: Test Your Space
|
| 107 |
+
24. [ ] Go to your HF Space URL: `https://your-username-your-space.hf.space`
|
| 108 |
+
25. [ ] Wait for the app to build (first time takes 5-10 minutes)
|
| 109 |
+
26. [ ] Test basic functionality:
|
| 110 |
+
- [ ] Enter a text prompt
|
| 111 |
+
- [ ] Generate an image
|
| 112 |
+
- [ ] Check that the interface loads properly
|
| 113 |
+
|
| 114 |
+
## Phase 4.E: Production Workflow
|
| 115 |
+
|
| 116 |
+
### For Future Updates
|
| 117 |
+
- [ ] Create feature branches for new development
|
| 118 |
+
- [ ] Test changes locally: `streamlit run src/ui/compi_phase3_final_dashboard.py`
|
| 119 |
+
- [ ] Open Pull Request to `main`
|
| 120 |
+
- [ ] Merge PR → Automatic deployment to HF Space
|
| 121 |
+
|
| 122 |
+
### Rollback Process
|
| 123 |
+
- [ ] If deployment breaks, revert the commit on `main`
|
| 124 |
+
- [ ] GitHub Actions will automatically redeploy the previous version
|
| 125 |
+
|
| 126 |
+
## Troubleshooting
|
| 127 |
+
|
| 128 |
+
### Common Issues
|
| 129 |
+
- **Space won't start**: Check the build logs in HF Space → Logs
|
| 130 |
+
- **GitHub Action fails**: Check repo secrets are set correctly
|
| 131 |
+
- **App crashes**: Verify `app_file` path in Space README is correct
|
| 132 |
+
- **Missing dependencies**: Check `requirements.txt` and `packages.txt`
|
| 133 |
+
|
| 134 |
+
### Support Resources
|
| 135 |
+
- [Hugging Face Spaces Documentation](https://huggingface.co/docs/hub/spaces)
|
| 136 |
+
- [GitHub Actions Documentation](https://docs.github.com/en/actions)
|
| 137 |
+
- [Streamlit Documentation](https://docs.streamlit.io/)
|
| 138 |
+
|
| 139 |
+
## Success! 🎉
|
| 140 |
+
|
| 141 |
+
Once complete, your CompI platform will be:
|
| 142 |
+
- ✅ Publicly accessible at your HF Space URL
|
| 143 |
+
- ✅ Automatically deployed on every `main` branch update
|
| 144 |
+
- ✅ Running on free Hugging Face infrastructure
|
| 145 |
+
- ✅ Ready for users worldwide to create multimodal AI art
|
| 146 |
+
|
| 147 |
+
**Share your Space URL**: `https://your-username-your-space.hf.space`
|
README.md
CHANGED
|
@@ -1,11 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
-
title: Comp I
|
| 3 |
-
emoji: 🔥
|
| 4 |
-
colorFrom: green
|
| 5 |
-
colorTo: yellow
|
| 6 |
-
sdk: static
|
| 7 |
-
pinned: false
|
| 8 |
-
license: mit
|
| 9 |
-
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI - Compositional Intelligence Project
|
| 2 |
+
|
| 3 |
+
A multi-modal AI system that generates creative content by combining text, images, audio, and emotional context.
|
| 4 |
+
|
| 5 |
+
Note: All documentation has been consolidated under docs/. See docs/README.md for an index of guides.
|
| 6 |
+
|
| 7 |
+
## 🚀 Project Overview
|
| 8 |
+
|
| 9 |
+
CompI (Compositional Intelligence) is designed to create rich, contextually-aware content by:
|
| 10 |
+
|
| 11 |
+
- Processing text prompts with emotional analysis
|
| 12 |
+
- Generating images using Stable Diffusion
|
| 13 |
+
- Creating audio compositions
|
| 14 |
+
- Combining multiple modalities for enhanced creative output
|
| 15 |
+
|
| 16 |
+
## 📁 Project Structure
|
| 17 |
+
|
| 18 |
+
```
|
| 19 |
+
Project CompI/
|
| 20 |
+
├── src/ # Source code
|
| 21 |
+
│ ├── generators/ # Image generation modules
|
| 22 |
+
│ ├── models/ # Model implementations
|
| 23 |
+
│ ├── utils/ # Utility functions
|
| 24 |
+
│ ├── data/ # Data processing
|
| 25 |
+
│ ├── ui/ # User interface components
|
| 26 |
+
│ └── setup_env.py # Environment setup script
|
| 27 |
+
├── notebooks/ # Jupyter notebooks for experimentation
|
| 28 |
+
├── data/ # Dataset storage
|
| 29 |
+
├── outputs/ # Generated content
|
| 30 |
+
├── tests/ # Unit tests
|
| 31 |
+
├── run_*.py # Convenience scripts for generators
|
| 32 |
+
├── requirements.txt # Python dependencies
|
| 33 |
+
└── README.md # This file
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## 🛠️ Setup Instructions
|
| 37 |
+
|
| 38 |
+
### 1. Create Virtual Environment
|
| 39 |
+
|
| 40 |
+
```bash
|
| 41 |
+
# Using conda (recommended for ML projects)
|
| 42 |
+
conda create -n compi-env python=3.10 -y
|
| 43 |
+
conda activate compi-env
|
| 44 |
+
|
| 45 |
+
# OR using venv
|
| 46 |
+
python -m venv compi-env
|
| 47 |
+
# Windows
|
| 48 |
+
compi-env\Scripts\activate
|
| 49 |
+
# Linux/Mac
|
| 50 |
+
source compi-env/bin/activate
|
| 51 |
+
```
|
| 52 |
+
|
| 53 |
+
### 2. Install Dependencies
|
| 54 |
+
|
| 55 |
+
**For GPU users (recommended for faster generation):**
|
| 56 |
+
|
| 57 |
+
```bash
|
| 58 |
+
# First, check your CUDA version
|
| 59 |
+
nvidia-smi
|
| 60 |
+
|
| 61 |
+
# Install PyTorch with CUDA support first (replace cu121 with your CUDA version)
|
| 62 |
+
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
|
| 63 |
+
|
| 64 |
+
# Then install remaining requirements
|
| 65 |
+
pip install -r requirements.txt
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
**For CPU-only users:**
|
| 69 |
+
|
| 70 |
+
```bash
|
| 71 |
+
pip install -r requirements.txt
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
### 3. Test Installation
|
| 75 |
+
|
| 76 |
+
```bash
|
| 77 |
+
python src/test_setup.py
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
## 🚀 Quick Start
|
| 81 |
+
|
| 82 |
+
### Phase 1: Text-to-Image Generation
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
# Basic text-to-image generation
|
| 86 |
+
python run_basic_generation.py "A magical forest, digital art"
|
| 87 |
+
|
| 88 |
+
# Advanced generation with style conditioning
|
| 89 |
+
python run_advanced_styling.py "dragon in a crystal cave" --style "oil painting" --mood "dramatic"
|
| 90 |
+
|
| 91 |
+
# Interactive style selection
|
| 92 |
+
python run_styled_generation.py
|
| 93 |
+
|
| 94 |
+
# Quality evaluation and analysis
|
| 95 |
+
python run_evaluation.py
|
| 96 |
+
|
| 97 |
+
# Personal style training with LoRA
|
| 98 |
+
python run_lora_training.py --dataset-dir datasets/my_style
|
| 99 |
+
|
| 100 |
+
# Generate with personal style
|
| 101 |
+
python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "artwork in my_style"
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
### Phase 2.A: Audio-to-Image Generation 🎵
|
| 105 |
+
|
| 106 |
+
```bash
|
| 107 |
+
# Install audio processing dependencies
|
| 108 |
+
pip install openai-whisper
|
| 109 |
+
|
| 110 |
+
# Streamlit UI (Recommended)
|
| 111 |
+
streamlit run src/ui/compi_phase2a_streamlit_ui.py
|
| 112 |
+
|
| 113 |
+
# Command line generation
|
| 114 |
+
python run_phase2a_audio_to_image.py --prompt "mystical forest" --audio "music.mp3"
|
| 115 |
+
|
| 116 |
+
# Interactive mode
|
| 117 |
+
python run_phase2a_audio_to_image.py --interactive
|
| 118 |
+
|
| 119 |
+
# Test installation
|
| 120 |
+
python src/test_phase2a.py
|
| 121 |
+
|
| 122 |
+
# Run examples
|
| 123 |
+
python examples/phase2a_audio_examples.py --example all
|
| 124 |
+
```
|
| 125 |
+
|
| 126 |
+
### Phase 2.B: Data/Logic-to-Image Generation 📊
|
| 127 |
+
|
| 128 |
+
```bash
|
| 129 |
+
# Streamlit UI (Recommended)
|
| 130 |
+
streamlit run src/ui/compi_phase2b_streamlit_ui.py
|
| 131 |
+
|
| 132 |
+
# Command line generation with CSV data
|
| 133 |
+
python run_phase2b_data_to_image.py --prompt "data visualization" --csv "data.csv"
|
| 134 |
+
|
| 135 |
+
# Mathematical formula generation
|
| 136 |
+
python run_phase2b_data_to_image.py --prompt "mathematical harmony" --formula "np.sin(np.linspace(0, 4*np.pi, 100))"
|
| 137 |
+
|
| 138 |
+
# Batch processing
|
| 139 |
+
python run_phase2b_data_to_image.py --batch-csv "data_folder/" --prompt "scientific patterns"
|
| 140 |
+
|
| 141 |
+
# Interactive mode
|
| 142 |
+
python run_phase2b_data_to_image.py --interactive
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
### Phase 2.C: Emotional/Contextual Input to Image Generation 🌀
|
| 146 |
+
|
| 147 |
+
```bash
|
| 148 |
+
# Streamlit UI (Recommended)
|
| 149 |
+
streamlit run src/ui/compi_phase2c_streamlit_ui.py
|
| 150 |
+
|
| 151 |
+
# Command line generation with preset emotion
|
| 152 |
+
python run_phase2c_emotion_to_image.py --prompt "mystical forest" --emotion "mysterious"
|
| 153 |
+
|
| 154 |
+
# Custom emotion generation
|
| 155 |
+
python run_phase2c_emotion_to_image.py --prompt "urban landscape" --emotion "🤩" --type custom
|
| 156 |
+
|
| 157 |
+
# Descriptive emotion generation
|
| 158 |
+
python run_phase2c_emotion_to_image.py --prompt "mountain vista" --emotion "I feel a sense of wonder" --type text
|
| 159 |
+
|
| 160 |
+
# Batch emotion processing
|
| 161 |
+
python run_phase2c_emotion_to_image.py --batch-emotions "joyful,sad,mysterious" --prompt "abstract art"
|
| 162 |
+
|
| 163 |
+
# Interactive mode
|
| 164 |
+
python run_phase2c_emotion_to_image.py --interactive
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
### Phase 2.D: Real-Time Data Feeds to Image Generation 🌎
|
| 168 |
+
|
| 169 |
+
```bash
|
| 170 |
+
# Streamlit UI (Recommended)
|
| 171 |
+
streamlit run src/ui/compi_phase2d_streamlit_ui.py
|
| 172 |
+
|
| 173 |
+
# Command line generation with weather data
|
| 174 |
+
python run_phase2d_realtime_to_image.py --prompt "cityscape" --weather --city "Tokyo"
|
| 175 |
+
|
| 176 |
+
# News-driven generation
|
| 177 |
+
python run_phase2d_realtime_to_image.py --prompt "abstract art" --news --category "technology"
|
| 178 |
+
|
| 179 |
+
# Multi-source generation
|
| 180 |
+
python run_phase2d_realtime_to_image.py --prompt "world state" --weather --news --financial
|
| 181 |
+
|
| 182 |
+
# Temporal series generation
|
| 183 |
+
python run_phase2d_realtime_to_image.py --prompt "evolving world" --weather --temporal "0,30,60"
|
| 184 |
+
|
| 185 |
+
# Interactive mode
|
| 186 |
+
python run_phase2d_realtime_to_image.py --interactive
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
### Phase 2.E: Style Reference/Example Image to AI Art 🖼️
|
| 190 |
+
|
| 191 |
+
```bash
|
| 192 |
+
# Streamlit UI (Recommended)
|
| 193 |
+
streamlit run src/ui/compi_phase2e_streamlit_ui.py
|
| 194 |
+
|
| 195 |
+
# Command line generation with reference image
|
| 196 |
+
python run_phase2e_refimg_to_image.py --prompt "magical forest" --reference "path/to/image.jpg" --strength 0.6
|
| 197 |
+
|
| 198 |
+
# Web URL reference
|
| 199 |
+
python run_phase2e_refimg_to_image.py --prompt "cyberpunk city" --reference "https://example.com/artwork.jpg"
|
| 200 |
+
|
| 201 |
+
# Batch generation with multiple variations
|
| 202 |
+
python run_phase2e_refimg_to_image.py --prompt "fantasy landscape" --reference "image.png" --num-images 3
|
| 203 |
+
|
| 204 |
+
# Style analysis only
|
| 205 |
+
python run_phase2e_refimg_to_image.py --analyze-only --reference "artwork.jpg"
|
| 206 |
+
|
| 207 |
+
# Interactive mode
|
| 208 |
+
python run_phase2e_refimg_to_image.py --interactive
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
## 🧪 NEW: Ultimate Multimodal Dashboard (True Fusion) 🚀
|
| 212 |
+
|
| 213 |
+
**Revolutionary upgrade with REAL processing of each input type!**
|
| 214 |
+
|
| 215 |
+
```bash
|
| 216 |
+
# Launch the upgraded dashboard with true multimodal fusion
|
| 217 |
+
python run_ultimate_multimodal_dashboard.py
|
| 218 |
+
|
| 219 |
+
# Or run directly
|
| 220 |
+
streamlit run src/ui/compi_ultimate_multimodal_dashboard.py --server.port 8503
|
| 221 |
+
```
|
| 222 |
+
|
| 223 |
+
**Key Improvements:**
|
| 224 |
+
|
| 225 |
+
- ✅ **Real Audio Analysis**: Whisper transcription + librosa features
|
| 226 |
+
- ✅ **Actual Data Processing**: CSV analysis + formula evaluation
|
| 227 |
+
- ✅ **True Emotion Analysis**: TextBlob sentiment classification
|
| 228 |
+
- ✅ **Live Real-time Data**: Weather/news API integration
|
| 229 |
+
- ✅ **Advanced References**: img2img + ControlNet processing
|
| 230 |
+
- ✅ **Intelligent Fusion**: Actual content processing (not static keywords)
|
| 231 |
+
|
| 232 |
+
**Access at:** `http://localhost:8503`
|
| 233 |
+
|
| 234 |
+
**See:** `ULTIMATE_MULTIMODAL_DASHBOARD_README.md` for detailed documentation.
|
| 235 |
+
|
| 236 |
+
## 🖼️ NEW: Phase 3.C Advanced Reference Integration 🚀
|
| 237 |
+
|
| 238 |
+
**Professional multi-reference control with hybrid generation modes!**
|
| 239 |
+
|
| 240 |
+
**Key Features:**
|
| 241 |
+
|
| 242 |
+
- ✅ **Role-Based Reference Assignment**: Select images for style vs structure
|
| 243 |
+
- ✅ **Live ControlNet Previews**: Real-time Canny/Depth preprocessing
|
| 244 |
+
- ✅ **Hybrid Generation Modes**: CN + IMG2IMG simultaneous processing
|
| 245 |
+
- ✅ **Professional Controls**: Independent strength tuning for style/structure
|
| 246 |
+
- ✅ **Seamless Integration**: Works with all CompI multimodal phases
|
| 247 |
+
|
| 248 |
+
**See:** `PHASE3C_ADVANCED_REFERENCE_INTEGRATION.md` for complete documentation.
|
| 249 |
+
|
| 250 |
+
## 🗂️ NEW: Phase 3.D Professional Workflow Manager 🚀
|
| 251 |
+
|
| 252 |
+
**Complete creative workflow platform with unified logging, presets, and export bundles!**
|
| 253 |
+
|
| 254 |
+
**Key Features:**
|
| 255 |
+
|
| 256 |
+
- ✅ **Unified Run Logging**: Auto-ingests from all CompI phases
|
| 257 |
+
- ✅ **Professional Gallery**: Advanced filtering and search
|
| 258 |
+
- ✅ **Preset System**: Save/load complete generation configs
|
| 259 |
+
- ✅ **Export Bundles**: ZIP packages with metadata and reproducibility
|
| 260 |
+
- ✅ **Annotation System**: Ratings, tags, and notes for workflow management
|
| 261 |
+
|
| 262 |
+
**Launch:** `python run_phase3d_workflow_manager.py` | **Access:** `http://localhost:8504`
|
| 263 |
+
|
| 264 |
+
**See:** `docs/PHASE3D_WORKFLOW_MANAGER_GUIDE.md` for complete documentation.
|
| 265 |
+
|
| 266 |
+
## ⚙️ NEW: Phase 3.E Performance, Model Management & Reliability 🚀
|
| 267 |
+
|
| 268 |
+
**Production-grade performance optimization, model switching, and intelligent reliability!**
|
| 269 |
+
|
| 270 |
+
**Key Features:**
|
| 271 |
+
|
| 272 |
+
- ✅ **Model Manager**: Dynamic SD 1.5 ↔ SDXL switching with auto-availability checking
|
| 273 |
+
- ✅ **LoRA Integration**: Universal LoRA loading with scale control across all models
|
| 274 |
+
- ✅ **Performance Controls**: xFormers, attention slicing, VAE optimizations, precision control
|
| 275 |
+
- ✅ **VRAM Monitoring**: Real-time GPU memory usage tracking and alerts
|
| 276 |
+
- ✅ **Reliability Engine**: OOM-safe auto-retry with intelligent fallbacks
|
| 277 |
+
- ✅ **Batch Processing**: Seed-controlled batch generation with memory management
|
| 278 |
+
- ✅ **Upscaler Integration**: Optional 2x latent upscaling for enhanced quality
|
| 279 |
+
|
| 280 |
+
**Launch:** `python run_phase3e_performance_manager.py` | **Access:** `http://localhost:8505`
|
| 281 |
+
|
| 282 |
+
**See:** `docs/PHASE3E_PERFORMANCE_GUIDE.md` for complete documentation.
|
| 283 |
+
|
| 284 |
+
## 🧪 ULTIMATE: Phase 3 Final Dashboard - Complete Integration! 🎉
|
| 285 |
+
|
| 286 |
+
**The ultimate CompI interface that integrates ALL Phase 3 components into one unified creative environment!**
|
| 287 |
+
|
| 288 |
+
**Complete Feature Integration:**
|
| 289 |
+
|
| 290 |
+
- ✅ **🧩 Multimodal Fusion (3.A/3.B)**: Real audio, data, emotion, real-time processing
|
| 291 |
+
- ✅ **🖼️ Advanced References (3.C)**: Role assignment, ControlNet, live previews
|
| 292 |
+
- ✅ **⚙️ Performance Management (3.E)**: Model switching, LoRA, VRAM monitoring
|
| 293 |
+
- ✅ **🎛️ Intelligent Generation**: Hybrid modes with automatic fallback strategies
|
| 294 |
+
- ✅ **🖼️ Professional Gallery (3.D)**: Filtering, rating, annotation system
|
| 295 |
+
- ✅ **💾 Preset Management (3.D)**: Save/load complete configurations
|
| 296 |
+
- ✅ **📦 Export System (3.D)**: Complete bundles with metadata and reproducibility
|
| 297 |
+
|
| 298 |
+
**Professional Workflow:**
|
| 299 |
+
|
| 300 |
+
1. **Configure multimodal inputs** (text, audio, data, emotion, real-time)
|
| 301 |
+
2. **Upload and assign references** (style vs structure roles)
|
| 302 |
+
3. **Choose model and optimize performance** (SD 1.5/SDXL, LoRA, optimizations)
|
| 303 |
+
4. **Generate with intelligent fusion** (automatic mode selection)
|
| 304 |
+
5. **Review and annotate results** (gallery with rating/tagging)
|
| 305 |
+
6. **Save presets and export bundles** (complete reproducibility)
|
| 306 |
+
|
| 307 |
+
**Launch:** `python run_phase3_final_dashboard.py` | **Access:** `http://localhost:8506`
|
| 308 |
+
|
| 309 |
+
**See:** `docs/PHASE3_FINAL_DASHBOARD_GUIDE.md` for complete documentation.
|
| 310 |
+
|
| 311 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 312 |
|
| 313 |
+
## 🎯 **CompI Project Status: COMPLETE** ✅
|
| 314 |
+
|
| 315 |
+
**CompI has achieved its ultimate vision: the world's most comprehensive and production-ready multimodal AI art generation platform!**
|
| 316 |
+
|
| 317 |
+
### **✅ All Phases Complete:**
|
| 318 |
+
|
| 319 |
+
- **✅ Phase 1**: Foundation (text-to-image, styling, evaluation, LoRA training)
|
| 320 |
+
- **✅ Phase 2**: Multimodal integration (audio, data, emotion, real-time, references)
|
| 321 |
+
- **✅ Phase 3**: Advanced features (fusion dashboard, advanced references, workflow management, performance optimization)
|
| 322 |
+
|
| 323 |
+
### **🚀 What CompI Offers:**
|
| 324 |
+
|
| 325 |
+
- **Complete Creative Platform**: From generation to professional workflow management
|
| 326 |
+
- **Production-Grade Reliability**: Robust error handling and performance optimization
|
| 327 |
+
- **Professional Tools**: Industry-standard features for serious creative and commercial work
|
| 328 |
+
- **Universal Compatibility**: Works across different hardware configurations
|
| 329 |
+
- **Extensible Foundation**: Ready for future enhancements and integrations
|
| 330 |
+
|
| 331 |
+
**CompI is now the ultimate multimodal AI art generation platform - ready for professional creative work!** 🎨✨
|
| 332 |
+
|
| 333 |
+
## 🎯 Core Features
|
| 334 |
+
|
| 335 |
+
- **Text Analysis**: Emotion detection and sentiment analysis
|
| 336 |
+
- **Image Generation**: Stable Diffusion integration with advanced conditioning
|
| 337 |
+
- **Audio Processing**: Music and sound analysis with Whisper integration
|
| 338 |
+
- **Data Processing**: CSV analysis and mathematical formula evaluation
|
| 339 |
+
- **Emotion Processing**: Preset emotions, custom emotions, emoji, and contextual analysis
|
| 340 |
+
- **Real-Time Integration**: Live weather, news, and financial data feeds
|
| 341 |
+
- **Style Reference**: Upload/URL image guidance with AI-powered style analysis
|
| 342 |
+
- **Multi-modal Fusion**: Combining text, audio, data, emotions, real-time feeds, and visual references
|
| 343 |
+
- **Pattern Recognition**: Automatic detection of trends, correlations, and seasonality
|
| 344 |
+
- **Poetic Interpretation**: Converting data patterns and emotions into artistic language
|
| 345 |
+
- **Color Psychology**: Emotion-based color palette generation and conditioning
|
| 346 |
+
- **Temporal Awareness**: Time-sensitive data processing and evolution tracking
|
| 347 |
+
|
| 348 |
+
## 🔧 Tech Stack
|
| 349 |
+
|
| 350 |
+
- **Deep Learning**: PyTorch, Transformers, Diffusers
|
| 351 |
+
- **Audio**: librosa, soundfile
|
| 352 |
+
- **UI**: Streamlit/Gradio
|
| 353 |
+
- **Data**: pandas, numpy
|
| 354 |
+
- **Visualization**: matplotlib, seaborn
|
| 355 |
+
|
| 356 |
+
## 📝 Usage
|
| 357 |
+
|
| 358 |
+
Coming soon - basic usage examples and API documentation.
|
| 359 |
+
|
| 360 |
+
## 🤝 Contributing
|
| 361 |
+
|
| 362 |
+
This is a development project. Feel free to experiment and extend functionality.
|
| 363 |
+
|
| 364 |
+
## 📄 License
|
| 365 |
+
|
| 366 |
+
MIT License - see LICENSE file for details.
|
| 367 |
+
|
| 368 |
+
# Project_CompI
|
docs/PHASE1D_EVALUATION_GUIDE.md
ADDED
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 1.D: Baseline Output Quality Evaluation Guide
|
| 2 |
+
|
| 3 |
+
## 🎯 Overview
|
| 4 |
+
|
| 5 |
+
Phase 1.D provides comprehensive tools for systematically evaluating the quality, coherence, and consistency of images generated by your CompI pipeline. This phase is crucial for understanding your model's performance and tracking improvements over time.
|
| 6 |
+
|
| 7 |
+
## 🛠️ Tools Provided
|
| 8 |
+
|
| 9 |
+
### 1. **Streamlit Web Interface** (`compi_phase1d_evaluate_quality.py`)
|
| 10 |
+
- **Interactive visual evaluation** with side-by-side image and metadata display
|
| 11 |
+
- **Multi-criteria scoring system** (1-5 stars) for comprehensive assessment
|
| 12 |
+
- **Objective metrics calculation** (perceptual hashes, file size, dimensions)
|
| 13 |
+
- **Persistent evaluation logging** with CSV export
|
| 14 |
+
- **Batch evaluation capabilities** for efficient processing
|
| 15 |
+
|
| 16 |
+
### 2. **Command-Line Interface** (`compi_phase1d_cli_evaluation.py`)
|
| 17 |
+
- **Batch processing** for automated evaluation workflows
|
| 18 |
+
- **Statistical analysis** and performance summaries
|
| 19 |
+
- **Detailed report generation** with recommendations
|
| 20 |
+
- **Filtering and listing** capabilities for organized review
|
| 21 |
+
|
| 22 |
+
### 3. **Convenient Launcher** (`run_evaluation.py`)
|
| 23 |
+
- **One-click startup** for the web interface
|
| 24 |
+
- **Automatic environment checking** and error handling
|
| 25 |
+
|
| 26 |
+
## 📊 Evaluation Criteria
|
| 27 |
+
|
| 28 |
+
The evaluation system uses **5 comprehensive criteria**, each scored on a **1-5 scale**:
|
| 29 |
+
|
| 30 |
+
### 1. **Prompt Adherence**
|
| 31 |
+
- How well does the image match the text prompt?
|
| 32 |
+
- Scale: 1=Poor match → 5=Perfect match
|
| 33 |
+
|
| 34 |
+
### 2. **Style Consistency**
|
| 35 |
+
- How well does the image reflect the intended artistic style?
|
| 36 |
+
- Scale: 1=Style not evident → 5=Style perfectly executed
|
| 37 |
+
|
| 38 |
+
### 3. **Mood & Atmosphere**
|
| 39 |
+
- How well does the image convey the intended mood/atmosphere?
|
| 40 |
+
- Scale: 1=Wrong mood → 5=Perfect mood
|
| 41 |
+
|
| 42 |
+
### 4. **Technical Quality**
|
| 43 |
+
- Overall image quality (resolution, composition, artifacts)
|
| 44 |
+
- Scale: 1=Poor quality → 5=Excellent quality
|
| 45 |
+
|
| 46 |
+
### 5. **Creative Appeal**
|
| 47 |
+
- Subjective aesthetic and creative value
|
| 48 |
+
- Scale: 1=Unappealing → 5=Highly appealing
|
| 49 |
+
|
| 50 |
+
## 🚀 Quick Start
|
| 51 |
+
|
| 52 |
+
### Web Interface (Recommended for Manual Review)
|
| 53 |
+
|
| 54 |
+
```bash
|
| 55 |
+
# Install required dependency
|
| 56 |
+
pip install imagehash
|
| 57 |
+
|
| 58 |
+
# Launch the evaluation interface
|
| 59 |
+
python run_evaluation.py
|
| 60 |
+
|
| 61 |
+
# Or run directly
|
| 62 |
+
streamlit run src/generators/compi_phase1d_evaluate_quality.py
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
The web interface will open at `http://localhost:8501` with:
|
| 66 |
+
- **Single Image Review**: Detailed evaluation of individual images
|
| 67 |
+
- **Batch Evaluation**: Quick scoring for multiple images
|
| 68 |
+
- **Summary Analysis**: Statistics and performance insights
|
| 69 |
+
|
| 70 |
+
### Command-Line Interface (For Automation)
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
# Analyze existing evaluations
|
| 74 |
+
python src/generators/compi_phase1d_cli_evaluation.py --analyze
|
| 75 |
+
|
| 76 |
+
# List unevaluated images
|
| 77 |
+
python src/generators/compi_phase1d_cli_evaluation.py --list-unevaluated
|
| 78 |
+
|
| 79 |
+
# Batch score all unevaluated images (prompt, style, mood, quality, appeal)
|
| 80 |
+
python src/generators/compi_phase1d_cli_evaluation.py --batch-score 4 3 4 4 3 --notes "Initial baseline evaluation"
|
| 81 |
+
|
| 82 |
+
# Generate detailed report
|
| 83 |
+
python src/generators/compi_phase1d_cli_evaluation.py --report --output evaluation_report.txt
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
## 📁 File Structure
|
| 87 |
+
|
| 88 |
+
```
|
| 89 |
+
outputs/
|
| 90 |
+
├── [generated images].png # Your CompI-generated images
|
| 91 |
+
├── evaluation_log.csv # Detailed evaluation data
|
| 92 |
+
└── evaluation_summary.json # Summary statistics
|
| 93 |
+
|
| 94 |
+
src/generators/
|
| 95 |
+
├── compi_phase1d_evaluate_quality.py # Main Streamlit interface
|
| 96 |
+
└── compi_phase1d_cli_evaluation.py # Command-line tools
|
| 97 |
+
|
| 98 |
+
run_evaluation.py # Convenient launcher
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
## 📈 Understanding Your Data
|
| 102 |
+
|
| 103 |
+
### Evaluation Log (`outputs/evaluation_log.csv`)
|
| 104 |
+
|
| 105 |
+
Contains detailed records with columns:
|
| 106 |
+
- **Image metadata**: filename, prompt, style, mood, seed, variation
|
| 107 |
+
- **Evaluation scores**: All 5 criteria scores (1-5)
|
| 108 |
+
- **Objective metrics**: dimensions, file size, perceptual hashes
|
| 109 |
+
- **Evaluation metadata**: timestamp, notes, evaluator comments
|
| 110 |
+
|
| 111 |
+
### Key Metrics to Track
|
| 112 |
+
|
| 113 |
+
1. **Overall Score Trends**: Are your images improving over time?
|
| 114 |
+
2. **Criteria Performance**: Which aspects (prompt match, style, etc.) need work?
|
| 115 |
+
3. **Style/Mood Effectiveness**: Which combinations work best?
|
| 116 |
+
4. **Consistency**: Are similar prompts producing consistent results?
|
| 117 |
+
|
| 118 |
+
## 🎯 Best Practices
|
| 119 |
+
|
| 120 |
+
### Systematic Evaluation Workflow
|
| 121 |
+
|
| 122 |
+
1. **Generate a batch** of images using your CompI tools
|
| 123 |
+
2. **Evaluate systematically** using consistent criteria
|
| 124 |
+
3. **Analyze patterns** in the data to identify strengths/weaknesses
|
| 125 |
+
4. **Adjust generation parameters** based on insights
|
| 126 |
+
5. **Re-evaluate** to measure improvements
|
| 127 |
+
|
| 128 |
+
### Evaluation Tips
|
| 129 |
+
|
| 130 |
+
- **Be consistent** in your scoring criteria across sessions
|
| 131 |
+
- **Use notes** to capture specific observations and issues
|
| 132 |
+
- **Evaluate in batches** of similar style/mood for better comparison
|
| 133 |
+
- **Track changes** over time as you refine your generation process
|
| 134 |
+
|
| 135 |
+
### Interpreting Scores
|
| 136 |
+
|
| 137 |
+
- **4.0+ average**: Excellent performance, ready for production use
|
| 138 |
+
- **3.0-3.9 average**: Good performance, minor improvements needed
|
| 139 |
+
- **2.0-2.9 average**: Moderate performance, significant improvements needed
|
| 140 |
+
- **Below 2.0**: Poor performance, major adjustments required
|
| 141 |
+
|
| 142 |
+
## 🔧 Advanced Usage
|
| 143 |
+
|
| 144 |
+
### Filtering and Analysis
|
| 145 |
+
|
| 146 |
+
```bash
|
| 147 |
+
# Analyze only specific styles
|
| 148 |
+
python src/generators/compi_phase1d_cli_evaluation.py --analyze --style "anime"
|
| 149 |
+
|
| 150 |
+
# List images by mood
|
| 151 |
+
python src/generators/compi_phase1d_cli_evaluation.py --list-all --mood "dramatic"
|
| 152 |
+
|
| 153 |
+
# Generate style-specific report
|
| 154 |
+
python src/generators/compi_phase1d_cli_evaluation.py --report --style "oil painting" --output oil_painting_analysis.txt
|
| 155 |
+
```
|
| 156 |
+
|
| 157 |
+
### Custom Evaluation Workflows
|
| 158 |
+
|
| 159 |
+
The evaluation tools are designed to be flexible:
|
| 160 |
+
- **Modify criteria** by editing `EVALUATION_CRITERIA` in the source
|
| 161 |
+
- **Add custom metrics** by extending the `get_image_metrics()` function
|
| 162 |
+
- **Integrate with other tools** using the CSV export functionality
|
| 163 |
+
|
| 164 |
+
## 📊 Sample Analysis Output
|
| 165 |
+
|
| 166 |
+
```
|
| 167 |
+
📊 CompI Phase 1.D - Evaluation Analysis
|
| 168 |
+
==================================================
|
| 169 |
+
Total Evaluated Images: 25
|
| 170 |
+
|
| 171 |
+
📈 Score Statistics:
|
| 172 |
+
Prompt Adherence : 3.84 ± 0.75 (range: 2-5)
|
| 173 |
+
Style Consistency : 3.52 ± 0.87 (range: 2-5)
|
| 174 |
+
Mood & Atmosphere : 3.68 ± 0.69 (range: 2-5)
|
| 175 |
+
Technical Quality : 4.12 ± 0.60 (range: 3-5)
|
| 176 |
+
Creative Appeal : 3.76 ± 0.83 (range: 2-5)
|
| 177 |
+
|
| 178 |
+
🎨 Top Performing Styles (by Prompt Match):
|
| 179 |
+
anime : 4.20
|
| 180 |
+
oil painting : 3.90
|
| 181 |
+
digital art : 3.75
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
## 🚀 Next Steps
|
| 185 |
+
|
| 186 |
+
After completing Phase 1.D evaluation:
|
| 187 |
+
|
| 188 |
+
1. **Identify improvement areas** from your evaluation data
|
| 189 |
+
2. **Experiment with parameter adjustments** for low-scoring criteria
|
| 190 |
+
3. **Document successful combinations** for future use
|
| 191 |
+
4. **Consider Phase 2** development based on baseline performance
|
| 192 |
+
5. **Set up regular evaluation cycles** for continuous improvement
|
| 193 |
+
|
| 194 |
+
## 🤝 Integration with Other Phases
|
| 195 |
+
|
| 196 |
+
Phase 1.D evaluation data can inform:
|
| 197 |
+
- **Phase 1.A/1.B parameter tuning**: Adjust generation settings based on quality scores
|
| 198 |
+
- **Phase 1.C UI improvements**: Highlight best-performing style/mood combinations
|
| 199 |
+
- **Future phases**: Use baseline metrics to measure advanced feature improvements
|
| 200 |
+
|
| 201 |
+
---
|
| 202 |
+
|
| 203 |
+
**Happy Evaluating! 🎨📊**
|
| 204 |
+
|
| 205 |
+
The systematic evaluation provided by Phase 1.D is essential for understanding and improving your CompI system's performance. Use these tools regularly to maintain high-quality output and track your progress over time.
|
docs/PHASE1E_LORA_GUIDE.md
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 1.E: Personal Style Fine-tuning with LoRA
|
| 2 |
+
|
| 3 |
+
## 🎯 Overview
|
| 4 |
+
|
| 5 |
+
Phase 1.E enables you to train **personalized artistic styles** using LoRA (Low-Rank Adaptation) fine-tuning on Stable Diffusion. This allows you to create AI art that reflects your unique artistic vision or mimics specific artistic styles.
|
| 6 |
+
|
| 7 |
+
**LoRA Benefits:**
|
| 8 |
+
- ✅ **Lightweight**: Only trains a small adapter (~10-100MB vs full model ~4GB)
|
| 9 |
+
- ✅ **Fast**: Training takes minutes to hours instead of days
|
| 10 |
+
- ✅ **Flexible**: Can be combined with different base models
|
| 11 |
+
- ✅ **Efficient**: Runs on consumer GPUs (8GB+ VRAM recommended)
|
| 12 |
+
|
| 13 |
+
## 🛠️ Tools Provided
|
| 14 |
+
|
| 15 |
+
### 1. **Dataset Preparation** (`compi_phase1e_dataset_prep.py`)
|
| 16 |
+
- Organize and validate your style images
|
| 17 |
+
- Generate appropriate training captions
|
| 18 |
+
- Resize and format images for optimal training
|
| 19 |
+
- Create train/validation splits
|
| 20 |
+
|
| 21 |
+
### 2. **LoRA Training** (`compi_phase1e_lora_training.py`)
|
| 22 |
+
- Full LoRA fine-tuning pipeline with PEFT integration
|
| 23 |
+
- Configurable training parameters and monitoring
|
| 24 |
+
- Automatic checkpoint saving and validation
|
| 25 |
+
- Memory-efficient training with gradient checkpointing
|
| 26 |
+
|
| 27 |
+
### 3. **Style Generation** (`compi_phase1e_style_generation.py`)
|
| 28 |
+
- Generate images using your trained LoRA styles
|
| 29 |
+
- Interactive and batch generation modes
|
| 30 |
+
- Adjustable style strength and parameters
|
| 31 |
+
- Integration with existing CompI pipeline
|
| 32 |
+
|
| 33 |
+
### 4. **Style Management** (`compi_phase1e_style_manager.py`)
|
| 34 |
+
- Manage multiple trained LoRA styles
|
| 35 |
+
- Cleanup old checkpoints and organize models
|
| 36 |
+
- Export style information and analytics
|
| 37 |
+
- Switch between different personal styles
|
| 38 |
+
|
| 39 |
+
## 🚀 Quick Start Guide
|
| 40 |
+
|
| 41 |
+
### Step 1: Install Dependencies
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
# Install LoRA training dependencies
|
| 45 |
+
pip install peft datasets bitsandbytes
|
| 46 |
+
|
| 47 |
+
# Verify installation
|
| 48 |
+
python -c "import peft, datasets; print('✅ Dependencies installed')"
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
### Step 2: Prepare Your Style Dataset
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
# Organize your style images in a folder
|
| 55 |
+
mkdir my_artwork
|
| 56 |
+
# Copy 10-50 images of your artistic style to my_artwork/
|
| 57 |
+
|
| 58 |
+
# Prepare dataset for training
|
| 59 |
+
python src/generators/compi_phase1e_dataset_prep.py \
|
| 60 |
+
--input-dir my_artwork \
|
| 61 |
+
--style-name "my_art_style" \
|
| 62 |
+
--trigger-word "myart"
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
**Dataset Requirements:**
|
| 66 |
+
- **10-50 images** (more is better, but 20+ is usually sufficient)
|
| 67 |
+
- **Consistent style** across all images
|
| 68 |
+
- **512x512 pixels** recommended (will be auto-resized)
|
| 69 |
+
- **High quality** images without watermarks or text
|
| 70 |
+
|
| 71 |
+
### Step 3: Train Your LoRA Style
|
| 72 |
+
|
| 73 |
+
```bash
|
| 74 |
+
# Start LoRA training
|
| 75 |
+
python run_lora_training.py \
|
| 76 |
+
--dataset-dir datasets/my_art_style \
|
| 77 |
+
--epochs 100 \
|
| 78 |
+
--learning-rate 1e-4
|
| 79 |
+
|
| 80 |
+
# Or with custom settings
|
| 81 |
+
python run_lora_training.py \
|
| 82 |
+
--dataset-dir datasets/my_art_style \
|
| 83 |
+
--epochs 200 \
|
| 84 |
+
--batch-size 2 \
|
| 85 |
+
--lora-rank 8 \
|
| 86 |
+
--lora-alpha 32
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
**Training Tips:**
|
| 90 |
+
- **Start with 100 epochs** for initial testing
|
| 91 |
+
- **Increase to 200-500 epochs** for stronger style learning
|
| 92 |
+
- **Monitor validation loss** to avoid overfitting
|
| 93 |
+
- **Use gradient checkpointing** if you run out of memory
|
| 94 |
+
|
| 95 |
+
### Step 4: Generate with Your Style
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
# Generate images with your trained style
|
| 99 |
+
python run_style_generation.py \
|
| 100 |
+
--lora-path lora_models/my_art_style/checkpoint-1000 \
|
| 101 |
+
"a cat in myart style" \
|
| 102 |
+
--variations 4
|
| 103 |
+
|
| 104 |
+
# Interactive mode
|
| 105 |
+
python run_style_generation.py \
|
| 106 |
+
--lora-path lora_models/my_art_style/checkpoint-1000 \
|
| 107 |
+
--interactive
|
| 108 |
+
```
|
| 109 |
+
|
| 110 |
+
## 📊 Advanced Usage
|
| 111 |
+
|
| 112 |
+
### Training Configuration
|
| 113 |
+
|
| 114 |
+
```bash
|
| 115 |
+
# High-quality training (slower but better results)
|
| 116 |
+
python run_lora_training.py \
|
| 117 |
+
--dataset-dir datasets/my_style \
|
| 118 |
+
--epochs 300 \
|
| 119 |
+
--learning-rate 5e-5 \
|
| 120 |
+
--lora-rank 16 \
|
| 121 |
+
--lora-alpha 32 \
|
| 122 |
+
--batch-size 1 \
|
| 123 |
+
--gradient-checkpointing
|
| 124 |
+
|
| 125 |
+
# Fast training (quicker results for testing)
|
| 126 |
+
python run_lora_training.py \
|
| 127 |
+
--dataset-dir datasets/my_style \
|
| 128 |
+
--epochs 50 \
|
| 129 |
+
--learning-rate 2e-4 \
|
| 130 |
+
--lora-rank 4 \
|
| 131 |
+
--lora-alpha 16
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
### Style Management
|
| 135 |
+
|
| 136 |
+
```bash
|
| 137 |
+
# List all trained styles
|
| 138 |
+
python src/generators/compi_phase1e_style_manager.py --list
|
| 139 |
+
|
| 140 |
+
# Get detailed info about a style
|
| 141 |
+
python src/generators/compi_phase1e_style_manager.py --info my_art_style
|
| 142 |
+
|
| 143 |
+
# Clean up old checkpoints (keep only 3 most recent)
|
| 144 |
+
python src/generators/compi_phase1e_style_manager.py --cleanup my_art_style --keep 3
|
| 145 |
+
|
| 146 |
+
# Export styles information to CSV
|
| 147 |
+
python src/generators/compi_phase1e_style_manager.py --export my_styles_report.csv
|
| 148 |
+
```
|
| 149 |
+
|
| 150 |
+
### Generation Parameters
|
| 151 |
+
|
| 152 |
+
```bash
|
| 153 |
+
# Adjust style strength
|
| 154 |
+
python run_style_generation.py \
|
| 155 |
+
--lora-path lora_models/my_style/checkpoint-1000 \
|
| 156 |
+
--lora-scale 0.8 \
|
| 157 |
+
"portrait in myart style"
|
| 158 |
+
|
| 159 |
+
# High-quality generation
|
| 160 |
+
python run_style_generation.py \
|
| 161 |
+
--lora-path lora_models/my_style/checkpoint-1000 \
|
| 162 |
+
--steps 50 \
|
| 163 |
+
--guidance 8.0 \
|
| 164 |
+
--width 768 \
|
| 165 |
+
--height 768 \
|
| 166 |
+
"landscape in myart style"
|
| 167 |
+
```
|
| 168 |
+
|
| 169 |
+
## 🎨 Best Practices
|
| 170 |
+
|
| 171 |
+
### Dataset Preparation
|
| 172 |
+
1. **Consistent Style**: All images should represent the same artistic style
|
| 173 |
+
2. **Quality over Quantity**: 20 high-quality images > 100 low-quality ones
|
| 174 |
+
3. **Diverse Subjects**: Include various subjects (people, objects, landscapes)
|
| 175 |
+
4. **Clean Images**: Remove watermarks, text, and irrelevant elements
|
| 176 |
+
5. **Proper Captions**: Use consistent trigger words in captions
|
| 177 |
+
|
| 178 |
+
### Training Tips
|
| 179 |
+
1. **Start Small**: Begin with 50-100 epochs to test
|
| 180 |
+
2. **Monitor Progress**: Check validation loss and sample generations
|
| 181 |
+
3. **Adjust Learning Rate**: Lower if loss oscillates, higher if learning is slow
|
| 182 |
+
4. **Use Checkpoints**: Save frequently to avoid losing progress
|
| 183 |
+
5. **Experiment with LoRA Rank**: Higher rank = more capacity but slower training
|
| 184 |
+
|
| 185 |
+
### Generation Guidelines
|
| 186 |
+
1. **Include Trigger Words**: Always use your trigger word in prompts
|
| 187 |
+
2. **Adjust Style Strength**: Use `--lora-scale` to control style intensity
|
| 188 |
+
3. **Combine with Techniques**: Mix with existing CompI style/mood systems
|
| 189 |
+
4. **Iterate and Refine**: Generate multiple variations and select best results
|
| 190 |
+
|
| 191 |
+
## 🔧 Troubleshooting
|
| 192 |
+
|
| 193 |
+
### Common Issues
|
| 194 |
+
|
| 195 |
+
**Out of Memory Error:**
|
| 196 |
+
```bash
|
| 197 |
+
# Reduce batch size and enable gradient checkpointing
|
| 198 |
+
python run_lora_training.py \
|
| 199 |
+
--dataset-dir datasets/my_style \
|
| 200 |
+
--batch-size 1 \
|
| 201 |
+
--gradient-checkpointing \
|
| 202 |
+
--mixed-precision
|
| 203 |
+
```
|
| 204 |
+
|
| 205 |
+
**Style Not Learning:**
|
| 206 |
+
- Increase epochs (try 200-500)
|
| 207 |
+
- Check dataset consistency
|
| 208 |
+
- Increase LoRA rank (try 8 or 16)
|
| 209 |
+
- Lower learning rate (try 5e-5)
|
| 210 |
+
|
| 211 |
+
**Generated Images Don't Match Style:**
|
| 212 |
+
- Include trigger word in prompts
|
| 213 |
+
- Increase LoRA scale (try 1.2-1.5)
|
| 214 |
+
- Train for more epochs
|
| 215 |
+
- Check dataset quality
|
| 216 |
+
|
| 217 |
+
**Training Too Slow:**
|
| 218 |
+
- Reduce image resolution to 512x512
|
| 219 |
+
- Use mixed precision training
|
| 220 |
+
- Enable gradient checkpointing
|
| 221 |
+
- Reduce LoRA rank to 4
|
| 222 |
+
|
| 223 |
+
## 📁 File Structure
|
| 224 |
+
|
| 225 |
+
```
|
| 226 |
+
Project CompI/
|
| 227 |
+
├── datasets/ # Prepared training datasets
|
| 228 |
+
│ └── my_art_style/
|
| 229 |
+
│ ├── train/ # Training images
|
| 230 |
+
│ ├── validation/ # Validation images
|
| 231 |
+
│ ├── train_captions.txt # Training captions
|
| 232 |
+
│ └── dataset_info.json # Dataset metadata
|
| 233 |
+
├── lora_models/ # Trained LoRA models
|
| 234 |
+
│ └── my_art_style/
|
| 235 |
+
│ ├── checkpoint-100/ # Training checkpoints
|
| 236 |
+
│ ├── checkpoint-200/
|
| 237 |
+
│ └── training_info.json # Training metadata
|
| 238 |
+
├── src/generators/
|
| 239 |
+
│ ├── compi_phase1e_dataset_prep.py # Dataset preparation
|
| 240 |
+
│ ├── compi_phase1e_lora_training.py # LoRA training
|
| 241 |
+
│ ├── compi_phase1e_style_generation.py # Style generation
|
| 242 |
+
│ └── compi_phase1e_style_manager.py # Style management
|
| 243 |
+
├── run_lora_training.py # Training launcher
|
| 244 |
+
└── run_style_generation.py # Generation launcher
|
| 245 |
+
```
|
| 246 |
+
|
| 247 |
+
## 🎯 Integration with CompI
|
| 248 |
+
|
| 249 |
+
Phase 1.E integrates seamlessly with existing CompI tools:
|
| 250 |
+
|
| 251 |
+
1. **Combine with Phase 1.B**: Use LoRA styles alongside predefined styles
|
| 252 |
+
2. **Evaluate with Phase 1.D**: Assess your LoRA-generated images systematically
|
| 253 |
+
3. **UI Integration**: Add LoRA styles to Streamlit/Gradio interfaces
|
| 254 |
+
4. **Batch Processing**: Generate multiple variations for evaluation
|
| 255 |
+
|
| 256 |
+
## 🚀 Next Steps
|
| 257 |
+
|
| 258 |
+
After mastering Phase 1.E:
|
| 259 |
+
|
| 260 |
+
1. **Experiment with Multiple Styles**: Train different LoRA adapters for various artistic approaches
|
| 261 |
+
2. **Style Mixing**: Combine multiple LoRA styles for unique effects
|
| 262 |
+
3. **Advanced Techniques**: Explore Textual Inversion, DreamBooth, or ControlNet integration
|
| 263 |
+
4. **Community Sharing**: Share your trained styles with the CompI community
|
| 264 |
+
5. **Phase 2 Preparation**: Use personal styles as foundation for multimodal integration
|
| 265 |
+
|
| 266 |
+
---
|
| 267 |
+
|
| 268 |
+
**Happy Style Training! 🎨✨**
|
| 269 |
+
|
| 270 |
+
Phase 1.E opens up endless possibilities for personalized AI art generation. With LoRA fine-tuning, you can teach the AI to understand and replicate your unique artistic vision, creating truly personalized creative content.
|
docs/PHASE1_USAGE.md
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 1: Text-to-Image Generation Usage Guide
|
| 2 |
+
|
| 3 |
+
This guide covers the Phase 1 implementation of CompI's text-to-image generation capabilities using Stable Diffusion.
|
| 4 |
+
|
| 5 |
+
## 🚀 Quick Start
|
| 6 |
+
|
| 7 |
+
### Basic Usage
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
# Simple generation with interactive prompt
|
| 11 |
+
python run_basic_generation.py
|
| 12 |
+
|
| 13 |
+
# Generate from command line
|
| 14 |
+
python run_basic_generation.py "A magical forest, digital art, highly detailed"
|
| 15 |
+
|
| 16 |
+
# Or run directly from src/generators/
|
| 17 |
+
python src/generators/compi_phase1_text2image.py "A magical forest"
|
| 18 |
+
```
|
| 19 |
+
|
| 20 |
+
### Advanced Usage
|
| 21 |
+
|
| 22 |
+
```bash
|
| 23 |
+
# Advanced script with more options
|
| 24 |
+
python run_advanced_generation.py "cyberpunk city at sunset" --negative "blurry, low quality" --steps 50 --batch 3
|
| 25 |
+
|
| 26 |
+
# Interactive mode for experimentation
|
| 27 |
+
python run_advanced_generation.py --interactive
|
| 28 |
+
|
| 29 |
+
# Or run directly from src/generators/
|
| 30 |
+
python src/generators/compi_phase1_advanced.py --interactive
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## 📋 Available Scripts
|
| 34 |
+
|
| 35 |
+
### 1. `compi_phase1_text2image.py` - Basic Implementation
|
| 36 |
+
|
| 37 |
+
**Features:**
|
| 38 |
+
|
| 39 |
+
- Simple, standalone text-to-image generation
|
| 40 |
+
- Automatic GPU/CPU detection
|
| 41 |
+
- Command line or interactive prompts
|
| 42 |
+
- Automatic output saving with descriptive filenames
|
| 43 |
+
- Comprehensive logging
|
| 44 |
+
|
| 45 |
+
**Usage:**
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
python compi_phase1_text2image.py [prompt]
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
### 2. `compi_phase1_advanced.py` - Enhanced Implementation
|
| 52 |
+
|
| 53 |
+
**Features:**
|
| 54 |
+
|
| 55 |
+
- Batch generation (multiple images)
|
| 56 |
+
- Negative prompts (what to avoid)
|
| 57 |
+
- Customizable parameters (steps, guidance, dimensions)
|
| 58 |
+
- Interactive mode for experimentation
|
| 59 |
+
- Metadata saving (JSON files with generation parameters)
|
| 60 |
+
- Multiple model support
|
| 61 |
+
|
| 62 |
+
**Command Line Options:**
|
| 63 |
+
|
| 64 |
+
```bash
|
| 65 |
+
python compi_phase1_advanced.py [OPTIONS] [PROMPT]
|
| 66 |
+
|
| 67 |
+
Options:
|
| 68 |
+
--negative, -n TEXT Negative prompt (what to avoid)
|
| 69 |
+
--steps, -s INTEGER Number of inference steps (default: 30)
|
| 70 |
+
--guidance, -g FLOAT Guidance scale (default: 7.5)
|
| 71 |
+
--seed INTEGER Random seed for reproducibility
|
| 72 |
+
--batch, -b INTEGER Number of images to generate
|
| 73 |
+
--width, -w INTEGER Image width (default: 512)
|
| 74 |
+
--height INTEGER Image height (default: 512)
|
| 75 |
+
--model, -m TEXT Model to use (default: runwayml/stable-diffusion-v1-5)
|
| 76 |
+
--output, -o TEXT Output directory (default: outputs)
|
| 77 |
+
--interactive, -i Interactive mode
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
## 🎨 Example Commands
|
| 81 |
+
|
| 82 |
+
### Basic Examples
|
| 83 |
+
|
| 84 |
+
```bash
|
| 85 |
+
# Simple landscape
|
| 86 |
+
python run_basic_generation.py "serene mountain lake, golden hour, photorealistic"
|
| 87 |
+
|
| 88 |
+
# Digital art style
|
| 89 |
+
python run_basic_generation.py "futuristic robot, neon lights, cyberpunk style, digital art"
|
| 90 |
+
```
|
| 91 |
+
|
| 92 |
+
### Advanced Examples
|
| 93 |
+
|
| 94 |
+
```bash
|
| 95 |
+
# High-quality generation with negative prompts
|
| 96 |
+
python run_advanced_generation.py "beautiful portrait of a woman, oil painting style" \
|
| 97 |
+
--negative "blurry, distorted, low quality, bad anatomy" \
|
| 98 |
+
--steps 50 --guidance 8.0
|
| 99 |
+
|
| 100 |
+
# Batch generation with fixed seed
|
| 101 |
+
python run_advanced_generation.py "abstract geometric patterns, colorful" \
|
| 102 |
+
--batch 5 --seed 12345 --steps 40
|
| 103 |
+
|
| 104 |
+
# Custom dimensions for landscape
|
| 105 |
+
python run_advanced_generation.py "panoramic view of alien landscape" \
|
| 106 |
+
--width 768 --height 512 --steps 35
|
| 107 |
+
|
| 108 |
+
# Interactive experimentation
|
| 109 |
+
python run_advanced_generation.py --interactive
|
| 110 |
+
```
|
| 111 |
+
|
| 112 |
+
## 📁 Output Structure
|
| 113 |
+
|
| 114 |
+
Generated images are saved in the `outputs/` directory with descriptive filenames:
|
| 115 |
+
|
| 116 |
+
```
|
| 117 |
+
outputs/
|
| 118 |
+
├── magical_forest_digital_art_20241225_143022_seed42.png
|
| 119 |
+
├── magical_forest_digital_art_20241225_143022_seed42_metadata.json
|
| 120 |
+
├── cyberpunk_city_sunset_20241225_143156_seed1337.png
|
| 121 |
+
└── cyberpunk_city_sunset_20241225_143156_seed1337_metadata.json
|
| 122 |
+
```
|
| 123 |
+
|
| 124 |
+
### Metadata Files
|
| 125 |
+
|
| 126 |
+
Each generated image (in advanced mode) includes a JSON metadata file with:
|
| 127 |
+
|
| 128 |
+
- Original prompt and negative prompt
|
| 129 |
+
- Generation parameters (steps, guidance, seed)
|
| 130 |
+
- Image dimensions and model used
|
| 131 |
+
- Timestamp and batch information
|
| 132 |
+
|
| 133 |
+
## ⚙️ Configuration Tips
|
| 134 |
+
|
| 135 |
+
### For Best Quality
|
| 136 |
+
|
| 137 |
+
- Use 30-50 inference steps
|
| 138 |
+
- Guidance scale 7.5-12.0
|
| 139 |
+
- Include style descriptors ("digital art", "oil painting", "photorealistic")
|
| 140 |
+
- Use negative prompts to avoid unwanted elements
|
| 141 |
+
|
| 142 |
+
### For Speed
|
| 143 |
+
|
| 144 |
+
- Use 20-25 inference steps
|
| 145 |
+
- Lower guidance scale (6.0-7.5)
|
| 146 |
+
- Stick to 512x512 resolution
|
| 147 |
+
|
| 148 |
+
### For Experimentation
|
| 149 |
+
|
| 150 |
+
- Use interactive mode
|
| 151 |
+
- Try different seeds with the same prompt
|
| 152 |
+
- Experiment with guidance scale values
|
| 153 |
+
- Use batch generation to explore variations
|
| 154 |
+
|
| 155 |
+
## 🔧 Troubleshooting
|
| 156 |
+
|
| 157 |
+
### Common Issues
|
| 158 |
+
|
| 159 |
+
1. **CUDA out of memory**: Reduce batch size or image dimensions
|
| 160 |
+
2. **Slow generation**: Ensure CUDA is available and working
|
| 161 |
+
3. **Poor quality**: Increase steps, adjust guidance scale, improve prompts
|
| 162 |
+
4. **Model download fails**: Check internet connection, try again
|
| 163 |
+
|
| 164 |
+
### Performance Optimization
|
| 165 |
+
|
| 166 |
+
- The scripts automatically enable attention slicing for memory efficiency
|
| 167 |
+
- GPU detection is automatic
|
| 168 |
+
- Models are cached after first download
|
| 169 |
+
|
| 170 |
+
## 🎨 Phase 1.B: Style Conditioning & Prompt Engineering
|
| 171 |
+
|
| 172 |
+
### 3. `compi_phase1b_styled_generation.py` - Style Conditioning
|
| 173 |
+
|
| 174 |
+
**Features:**
|
| 175 |
+
|
| 176 |
+
- Interactive style and mood selection from curated lists
|
| 177 |
+
- Intelligent prompt engineering and combination
|
| 178 |
+
- Multiple variations with unique seeds
|
| 179 |
+
- Comprehensive logging and filename organization
|
| 180 |
+
|
| 181 |
+
**Usage:**
|
| 182 |
+
|
| 183 |
+
```bash
|
| 184 |
+
python run_styled_generation.py [prompt]
|
| 185 |
+
# Or directly: python src/generators/compi_phase1b_styled_generation.py [prompt]
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
### 4. `compi_phase1b_advanced_styling.py` - Advanced Style Control
|
| 189 |
+
|
| 190 |
+
**Features:**
|
| 191 |
+
|
| 192 |
+
- 13 predefined art styles with optimized prompts and negative prompts
|
| 193 |
+
- 9 mood categories with atmospheric conditioning
|
| 194 |
+
- Quality presets (draft/standard/high)
|
| 195 |
+
- Command line and interactive modes
|
| 196 |
+
- Comprehensive metadata saving
|
| 197 |
+
|
| 198 |
+
**Command Line Options:**
|
| 199 |
+
|
| 200 |
+
```bash
|
| 201 |
+
python run_advanced_styling.py [OPTIONS] [PROMPT]
|
| 202 |
+
# Or directly: python src/generators/compi_phase1b_advanced_styling.py [OPTIONS] [PROMPT]
|
| 203 |
+
|
| 204 |
+
Options:
|
| 205 |
+
--style, -s TEXT Art style (or number from list)
|
| 206 |
+
--mood, -m TEXT Mood/atmosphere (or number from list)
|
| 207 |
+
--variations, -v INT Number of variations (default: 1)
|
| 208 |
+
--quality, -q CHOICE Quality preset [draft/standard/high]
|
| 209 |
+
--negative, -n TEXT Negative prompt
|
| 210 |
+
--interactive, -i Interactive mode
|
| 211 |
+
--list-styles List available styles and exit
|
| 212 |
+
--list-moods List available moods and exit
|
| 213 |
+
```
|
| 214 |
+
|
| 215 |
+
### Style Conditioning Examples
|
| 216 |
+
|
| 217 |
+
**Basic Style Selection:**
|
| 218 |
+
|
| 219 |
+
```bash
|
| 220 |
+
# Interactive mode with guided selection
|
| 221 |
+
python run_styled_generation.py
|
| 222 |
+
|
| 223 |
+
# Command line with style selection
|
| 224 |
+
python run_advanced_styling.py "mountain landscape" --style cyberpunk --mood dramatic
|
| 225 |
+
```
|
| 226 |
+
|
| 227 |
+
**Advanced Style Control:**
|
| 228 |
+
|
| 229 |
+
```bash
|
| 230 |
+
# High quality with multiple variations
|
| 231 |
+
python run_advanced_styling.py "portrait of a wizard" \
|
| 232 |
+
--style "oil painting" --mood "mysterious" \
|
| 233 |
+
--quality high --variations 3 \
|
| 234 |
+
--negative "blurry, distorted, amateur"
|
| 235 |
+
|
| 236 |
+
# List available options
|
| 237 |
+
python run_advanced_styling.py --list-styles
|
| 238 |
+
python run_advanced_styling.py --list-moods
|
| 239 |
+
```
|
| 240 |
+
|
| 241 |
+
**Available Styles:**
|
| 242 |
+
|
| 243 |
+
- digital art, oil painting, watercolor, cyberpunk
|
| 244 |
+
- impressionist, concept art, anime, photorealistic
|
| 245 |
+
- minimalist, surrealism, pixel art, steampunk, 3d render
|
| 246 |
+
|
| 247 |
+
**Available Moods:**
|
| 248 |
+
|
| 249 |
+
- dreamy, dark, peaceful, vibrant, melancholic
|
| 250 |
+
- mysterious, whimsical, dramatic, retro
|
| 251 |
+
|
| 252 |
+
## 🖥️ Phase 1.C: Interactive Web UI
|
| 253 |
+
|
| 254 |
+
### 5. `compi_phase1c_streamlit_ui.py` - Streamlit Web Interface
|
| 255 |
+
|
| 256 |
+
**Features:**
|
| 257 |
+
|
| 258 |
+
- Complete web-based interface for text-to-image generation
|
| 259 |
+
- Interactive style and mood selection with custom options
|
| 260 |
+
- Advanced settings (steps, guidance, dimensions, negative prompts)
|
| 261 |
+
- Real-time image generation and display
|
| 262 |
+
- Progress tracking and generation logs
|
| 263 |
+
- Automatic saving with comprehensive metadata
|
| 264 |
+
|
| 265 |
+
**Usage:**
|
| 266 |
+
|
| 267 |
+
```bash
|
| 268 |
+
python run_ui.py
|
| 269 |
+
# Or directly: streamlit run src/ui/compi_phase1c_streamlit_ui.py
|
| 270 |
+
```
|
| 271 |
+
|
| 272 |
+
### 6. `compi_phase1c_gradio_ui.py` - Gradio Web Interface
|
| 273 |
+
|
| 274 |
+
**Features:**
|
| 275 |
+
|
| 276 |
+
- Alternative web interface with Gradio framework
|
| 277 |
+
- Gallery view for multiple image variations
|
| 278 |
+
- Collapsible advanced settings
|
| 279 |
+
- Real-time generation logs
|
| 280 |
+
- Mobile-friendly responsive design
|
| 281 |
+
|
| 282 |
+
**Usage:**
|
| 283 |
+
|
| 284 |
+
```bash
|
| 285 |
+
python run_gradio_ui.py
|
| 286 |
+
# Or directly: python src/ui/compi_phase1c_gradio_ui.py
|
| 287 |
+
```
|
| 288 |
+
|
| 289 |
+
## 📊 Phase 1.D: Quality Evaluation Tools
|
| 290 |
+
|
| 291 |
+
### 7. `compi_phase1d_evaluate_quality.py` - Comprehensive Evaluation Interface
|
| 292 |
+
|
| 293 |
+
**Features:**
|
| 294 |
+
|
| 295 |
+
- Systematic image quality assessment with 5-criteria scoring system
|
| 296 |
+
- Interactive Streamlit web interface for detailed evaluation
|
| 297 |
+
- Objective metrics calculation (perceptual hashes, dimensions, file size)
|
| 298 |
+
- Batch evaluation capabilities for efficient processing
|
| 299 |
+
- Comprehensive logging and CSV export for trend analysis
|
| 300 |
+
- Summary analytics with performance insights and recommendations
|
| 301 |
+
|
| 302 |
+
**Usage:**
|
| 303 |
+
|
| 304 |
+
```bash
|
| 305 |
+
python run_evaluation.py
|
| 306 |
+
# Or directly: streamlit run src/generators/compi_phase1d_evaluate_quality.py
|
| 307 |
+
```
|
| 308 |
+
|
| 309 |
+
### 8. `compi_phase1d_cli_evaluation.py` - Command-Line Evaluation Tools
|
| 310 |
+
|
| 311 |
+
**Features:**
|
| 312 |
+
|
| 313 |
+
- Batch evaluation and analysis from command line
|
| 314 |
+
- Statistical summaries and performance reports
|
| 315 |
+
- Filtering by style, mood, and evaluation status
|
| 316 |
+
- Automated scoring for large image sets
|
| 317 |
+
- Detailed report generation with recommendations
|
| 318 |
+
|
| 319 |
+
**Command Line Options:**
|
| 320 |
+
|
| 321 |
+
```bash
|
| 322 |
+
python src/generators/compi_phase1d_cli_evaluation.py [OPTIONS]
|
| 323 |
+
|
| 324 |
+
Options:
|
| 325 |
+
--analyze Display evaluation summary and statistics
|
| 326 |
+
--report Generate detailed evaluation report
|
| 327 |
+
--batch-score P S M Q A Batch score images (1-5 for each criteria)
|
| 328 |
+
--list-all List all images with evaluation status
|
| 329 |
+
--list-evaluated List only evaluated images
|
| 330 |
+
--list-unevaluated List only unevaluated images
|
| 331 |
+
--style TEXT Filter by style
|
| 332 |
+
--mood TEXT Filter by mood
|
| 333 |
+
--notes TEXT Notes for batch evaluation
|
| 334 |
+
--output FILE Output file for reports
|
| 335 |
+
```
|
| 336 |
+
|
| 337 |
+
## 🎨 Phase 1.E: Personal Style Fine-tuning (LoRA)
|
| 338 |
+
|
| 339 |
+
### 9. `compi_phase1e_dataset_prep.py` - Dataset Preparation for LoRA Training
|
| 340 |
+
|
| 341 |
+
**Features:**
|
| 342 |
+
|
| 343 |
+
- Organize and validate personal style images for training
|
| 344 |
+
- Generate appropriate training captions with trigger words
|
| 345 |
+
- Resize and format images for optimal LoRA training
|
| 346 |
+
- Create train/validation splits with metadata tracking
|
| 347 |
+
- Support for multiple image formats and quality validation
|
| 348 |
+
|
| 349 |
+
**Usage:**
|
| 350 |
+
|
| 351 |
+
```bash
|
| 352 |
+
python src/generators/compi_phase1e_dataset_prep.py --input-dir my_artwork --style-name "my_art_style"
|
| 353 |
+
# Or via wrapper: python run_dataset_prep.py --input-dir my_artwork --style-name "my_art_style"
|
| 354 |
+
```
|
| 355 |
+
|
| 356 |
+
### 10. `compi_phase1e_lora_training.py` - LoRA Fine-tuning Engine
|
| 357 |
+
|
| 358 |
+
**Features:**
|
| 359 |
+
|
| 360 |
+
- Full LoRA (Low-Rank Adaptation) fine-tuning pipeline
|
| 361 |
+
- Memory-efficient training with gradient checkpointing
|
| 362 |
+
- Configurable LoRA parameters (rank, alpha, learning rate)
|
| 363 |
+
- Automatic checkpoint saving and validation monitoring
|
| 364 |
+
- Integration with PEFT library for optimal performance
|
| 365 |
+
|
| 366 |
+
**Command Line Options:**
|
| 367 |
+
|
| 368 |
+
```bash
|
| 369 |
+
python run_lora_training.py [OPTIONS] --dataset-dir DATASET_DIR
|
| 370 |
+
|
| 371 |
+
Options:
|
| 372 |
+
--dataset-dir DIR Required: Prepared dataset directory
|
| 373 |
+
--epochs INT Number of training epochs (default: 100)
|
| 374 |
+
--learning-rate FLOAT Learning rate (default: 1e-4)
|
| 375 |
+
--lora-rank INT LoRA rank (default: 4)
|
| 376 |
+
--lora-alpha INT LoRA alpha (default: 32)
|
| 377 |
+
--batch-size INT Training batch size (default: 1)
|
| 378 |
+
--save-steps INT Save checkpoint every N steps
|
| 379 |
+
--gradient-checkpointing Enable gradient checkpointing for memory efficiency
|
| 380 |
+
--mixed-precision Use mixed precision training
|
| 381 |
+
```
|
| 382 |
+
|
| 383 |
+
### 11. `compi_phase1e_style_generation.py` - Personal Style Generation
|
| 384 |
+
|
| 385 |
+
**Features:**
|
| 386 |
+
|
| 387 |
+
- Generate images using trained LoRA personal styles
|
| 388 |
+
- Adjustable style strength and generation parameters
|
| 389 |
+
- Interactive and batch generation modes
|
| 390 |
+
- Integration with existing CompI pipeline and metadata
|
| 391 |
+
- Support for multiple LoRA styles and model switching
|
| 392 |
+
|
| 393 |
+
**Usage:**
|
| 394 |
+
|
| 395 |
+
```bash
|
| 396 |
+
python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "a cat in my_style"
|
| 397 |
+
# Or directly: python src/generators/compi_phase1e_style_generation.py --lora-path PATH PROMPT
|
| 398 |
+
```
|
| 399 |
+
|
| 400 |
+
### 12. `compi_phase1e_style_manager.py` - LoRA Style Management
|
| 401 |
+
|
| 402 |
+
**Features:**
|
| 403 |
+
|
| 404 |
+
- Manage multiple trained LoRA styles and checkpoints
|
| 405 |
+
- Cleanup old checkpoints and organize model storage
|
| 406 |
+
- Export style information and training analytics
|
| 407 |
+
- Style database with automatic scanning and metadata
|
| 408 |
+
- Batch operations for style maintenance and organization
|
| 409 |
+
|
| 410 |
+
**Command Line Options:**
|
| 411 |
+
|
| 412 |
+
```bash
|
| 413 |
+
python src/generators/compi_phase1e_style_manager.py [OPTIONS]
|
| 414 |
+
|
| 415 |
+
Options:
|
| 416 |
+
--list List all available LoRA styles
|
| 417 |
+
--info STYLE_NAME Show detailed information about a style
|
| 418 |
+
--refresh Refresh the styles database
|
| 419 |
+
--cleanup STYLE_NAME Clean up old checkpoints for a style
|
| 420 |
+
--export OUTPUT_FILE Export styles information to CSV
|
| 421 |
+
--delete STYLE_NAME Delete a LoRA style (requires --confirm)
|
| 422 |
+
```
|
| 423 |
+
|
| 424 |
+
### Web UI Examples
|
| 425 |
+
|
| 426 |
+
**Streamlit Interface:**
|
| 427 |
+
|
| 428 |
+
- Navigate to http://localhost:8501 after running
|
| 429 |
+
- Full-featured interface with sidebar settings
|
| 430 |
+
- Progress bars and status updates
|
| 431 |
+
- Expandable sections for details
|
| 432 |
+
|
| 433 |
+
**Gradio Interface:**
|
| 434 |
+
|
| 435 |
+
- Navigate to http://localhost:7860 after running
|
| 436 |
+
- Gallery-style image display
|
| 437 |
+
- Compact, mobile-friendly design
|
| 438 |
+
- Real-time generation feedback
|
| 439 |
+
|
| 440 |
+
## 🎯 Next Steps
|
| 441 |
+
|
| 442 |
+
Phase 1 establishes the foundation for CompI's text-to-image capabilities. Future phases will add:
|
| 443 |
+
|
| 444 |
+
- Audio input processing
|
| 445 |
+
- Emotion and style conditioning
|
| 446 |
+
- Real-time data integration
|
| 447 |
+
- Multimodal fusion
|
| 448 |
+
- Advanced UI interfaces
|
| 449 |
+
|
| 450 |
+
## 📚 Resources
|
| 451 |
+
|
| 452 |
+
- [Stable Diffusion Documentation](https://huggingface.co/docs/diffusers)
|
| 453 |
+
- [Prompt Engineering Guide](https://prompthero.com/stable-diffusion-prompt-guide)
|
| 454 |
+
- [CompI Development Plan](development.md)
|
docs/PHASE2A_AUDIO_TO_IMAGE_GUIDE.md
ADDED
|
@@ -0,0 +1,307 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 2.A: Audio-to-Image Generation Guide
|
| 2 |
+
|
| 3 |
+
Welcome to **CompI Phase 2.A**, the next evolution in multimodal AI art generation! This phase introduces the ability to generate images influenced by audio input, combining the power of text prompts with the emotional and rhythmic qualities of sound.
|
| 4 |
+
|
| 5 |
+
## 🎵 What's New in Phase 2.A
|
| 6 |
+
|
| 7 |
+
### Core Features
|
| 8 |
+
|
| 9 |
+
- **Audio Analysis**: Extract tempo, energy, spectral features, and harmonic content from audio files
|
| 10 |
+
- **Audio Captioning**: Convert speech, music, and ambient sounds to descriptive text using OpenAI Whisper
|
| 11 |
+
- **Multimodal Fusion**: Intelligently combine text prompts with audio-derived features
|
| 12 |
+
- **Rich Metadata**: Comprehensive logging of audio features and generation context
|
| 13 |
+
- **Multiple Interfaces**: Streamlit UI, CLI, and programmatic API
|
| 14 |
+
|
| 15 |
+
### Supported Audio Formats
|
| 16 |
+
|
| 17 |
+
- MP3, WAV, FLAC, M4A, OGG
|
| 18 |
+
- Recommended: Under 60 seconds for optimal processing speed
|
| 19 |
+
- Automatic resampling to 16kHz for analysis
|
| 20 |
+
|
| 21 |
+
## 🚀 Quick Start
|
| 22 |
+
|
| 23 |
+
### 1. Install Dependencies
|
| 24 |
+
|
| 25 |
+
First, ensure you have the Phase 2.A dependencies:
|
| 26 |
+
|
| 27 |
+
```bash
|
| 28 |
+
pip install openai-whisper
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
All other dependencies should already be installed from Phase 1.
|
| 32 |
+
|
| 33 |
+
### 2. Streamlit UI (Recommended for Beginners)
|
| 34 |
+
|
| 35 |
+
Launch the interactive web interface:
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
streamlit run src/ui/compi_phase2a_streamlit_ui.py
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
Features:
|
| 42 |
+
|
| 43 |
+
- 🎵 Audio upload and playback
|
| 44 |
+
- 📊 Real-time audio analysis visualization
|
| 45 |
+
- 🎨 Interactive generation controls
|
| 46 |
+
- 📝 Enhanced prompt preview
|
| 47 |
+
- 🖼️ Instant results display
|
| 48 |
+
|
| 49 |
+
### 3. Command Line Interface
|
| 50 |
+
|
| 51 |
+
For power users and automation:
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
# Basic usage
|
| 55 |
+
python run_phase2a_audio_to_image.py --prompt "mystical forest" --audio "music.mp3"
|
| 56 |
+
|
| 57 |
+
# With style and mood
|
| 58 |
+
python run_phase2a_audio_to_image.py \
|
| 59 |
+
--prompt "cyberpunk city" \
|
| 60 |
+
--style "digital art" \
|
| 61 |
+
--mood "neon, futuristic" \
|
| 62 |
+
--audio "electronic.wav"
|
| 63 |
+
|
| 64 |
+
# Multiple variations
|
| 65 |
+
python run_phase2a_audio_to_image.py \
|
| 66 |
+
--prompt "abstract art" \
|
| 67 |
+
--audio "ambient.flac" \
|
| 68 |
+
--num-images 3
|
| 69 |
+
|
| 70 |
+
# Interactive mode
|
| 71 |
+
python run_phase2a_audio_to_image.py --interactive
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
+
### 4. Programmatic Usage
|
| 75 |
+
|
| 76 |
+
```python
|
| 77 |
+
from src.generators.compi_phase2a_audio_to_image import CompIPhase2AAudioToImage
|
| 78 |
+
|
| 79 |
+
# Initialize generator
|
| 80 |
+
generator = CompIPhase2AAudioToImage()
|
| 81 |
+
|
| 82 |
+
# Generate image with audio conditioning
|
| 83 |
+
results = generator.generate_image(
|
| 84 |
+
text_prompt="A serene mountain landscape",
|
| 85 |
+
style="impressionist",
|
| 86 |
+
mood="peaceful, contemplative",
|
| 87 |
+
audio_path="nature_sounds.wav",
|
| 88 |
+
num_images=2
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Access results
|
| 92 |
+
for result in results:
|
| 93 |
+
print(f"Generated: {result['filename']}")
|
| 94 |
+
result['image'].show() # Display image
|
| 95 |
+
```
|
| 96 |
+
|
| 97 |
+
## 🎨 How Audio Influences Art
|
| 98 |
+
|
| 99 |
+
### Audio Feature Extraction
|
| 100 |
+
|
| 101 |
+
CompI Phase 2.A analyzes multiple aspects of your audio:
|
| 102 |
+
|
| 103 |
+
1. **Tempo**: Beats per minute → influences rhythm and energy descriptors
|
| 104 |
+
2. **Energy (RMS)**: Overall loudness → affects intensity and power descriptors
|
| 105 |
+
3. **Zero Crossing Rate**: Rhythmic content → adds percussive/smooth qualities
|
| 106 |
+
4. **Spectral Centroid**: Brightness → influences warm/bright color palettes
|
| 107 |
+
5. **MFCC**: Timbre characteristics → affects texture and style
|
| 108 |
+
6. **Chroma**: Harmonic content → influences mood and atmosphere
|
| 109 |
+
|
| 110 |
+
### Intelligent Prompt Fusion
|
| 111 |
+
|
| 112 |
+
The system automatically enhances your text prompt based on audio analysis:
|
| 113 |
+
|
| 114 |
+
**Original Prompt**: "A mystical forest"
|
| 115 |
+
**Audio**: Slow, ambient music with low energy
|
| 116 |
+
**Enhanced Prompt**: "A mystical forest, slow and contemplative, gentle and subtle, warm and deep"
|
| 117 |
+
|
| 118 |
+
### Audio Captioning
|
| 119 |
+
|
| 120 |
+
Using OpenAI Whisper, the system can describe what it "hears":
|
| 121 |
+
|
| 122 |
+
- **Speech**: Transcribes spoken words and incorporates meaning
|
| 123 |
+
- **Music**: Identifies instruments, genres, and emotional qualities
|
| 124 |
+
- **Ambient**: Describes environmental sounds and atmospheres
|
| 125 |
+
|
| 126 |
+
## 📊 Understanding Audio Analysis
|
| 127 |
+
|
| 128 |
+
### Tempo Classifications
|
| 129 |
+
|
| 130 |
+
- **Very Slow** (< 60 BPM): Meditative, ethereal qualities
|
| 131 |
+
- **Slow** (60-90 BPM): Contemplative, peaceful atmospheres
|
| 132 |
+
- **Moderate** (90-120 BPM): Balanced, natural rhythms
|
| 133 |
+
- **Fast** (120-140 BPM): Energetic, dynamic compositions
|
| 134 |
+
- **Very Fast** (> 140 BPM): Intense, high-energy visuals
|
| 135 |
+
|
| 136 |
+
### Energy Levels
|
| 137 |
+
|
| 138 |
+
- **Low Energy** (< 0.02): Subtle, gentle, minimalist styles
|
| 139 |
+
- **Medium Energy** (0.02-0.05): Balanced, harmonious compositions
|
| 140 |
+
- **High Energy** (> 0.05): Vibrant, powerful, dramatic visuals
|
| 141 |
+
|
| 142 |
+
### Spectral Characteristics
|
| 143 |
+
|
| 144 |
+
- **Bright** (High Spectral Centroid): Light colors, sharp details
|
| 145 |
+
- **Dark** (Low Spectral Centroid): Deep colors, soft textures
|
| 146 |
+
- **Percussive** (High ZCR): Rhythmic patterns, geometric shapes
|
| 147 |
+
- **Smooth** (Low ZCR): Flowing forms, organic shapes
|
| 148 |
+
|
| 149 |
+
## 🎯 Best Practices
|
| 150 |
+
|
| 151 |
+
### Audio Selection
|
| 152 |
+
|
| 153 |
+
1. **Quality Matters**: Use clear, well-recorded audio for best results
|
| 154 |
+
2. **Length**: 10-60 seconds is optimal for processing speed
|
| 155 |
+
3. **Variety**: Experiment with different genres and sound types
|
| 156 |
+
4. **Context**: Choose audio that complements your text prompt
|
| 157 |
+
|
| 158 |
+
### Prompt Writing
|
| 159 |
+
|
| 160 |
+
1. **Be Descriptive**: Rich text prompts work better with audio conditioning
|
| 161 |
+
2. **Leave Room**: Let audio features add nuance to your base concept
|
| 162 |
+
3. **Experiment**: Try the same prompt with different audio files
|
| 163 |
+
4. **Balance**: Don't over-specify if you want audio to have strong influence
|
| 164 |
+
|
| 165 |
+
### Generation Settings
|
| 166 |
+
|
| 167 |
+
1. **Steps**: 30-50 steps for high quality (20 for quick tests)
|
| 168 |
+
2. **Guidance**: 7.5 is balanced (lower for more audio influence)
|
| 169 |
+
3. **Variations**: Generate multiple images to see different interpretations
|
| 170 |
+
4. **Seeds**: Save seeds of favorite results for consistency
|
| 171 |
+
|
| 172 |
+
## 🔧 Advanced Features
|
| 173 |
+
|
| 174 |
+
### Batch Processing
|
| 175 |
+
|
| 176 |
+
Process multiple audio files with the same prompt:
|
| 177 |
+
|
| 178 |
+
```bash
|
| 179 |
+
python run_phase2a_audio_to_image.py \
|
| 180 |
+
--prompt "abstract expressionism" \
|
| 181 |
+
--audio-dir "./music_collection/" \
|
| 182 |
+
--batch
|
| 183 |
+
```
|
| 184 |
+
|
| 185 |
+
### Custom Audio Analysis
|
| 186 |
+
|
| 187 |
+
```python
|
| 188 |
+
from src.utils.audio_utils import AudioProcessor, MultimodalPromptFusion
|
| 189 |
+
|
| 190 |
+
# Analyze audio separately
|
| 191 |
+
processor = AudioProcessor()
|
| 192 |
+
features = processor.analyze_audio_file("my_audio.wav")
|
| 193 |
+
|
| 194 |
+
# Create custom prompt fusion
|
| 195 |
+
fusion = MultimodalPromptFusion()
|
| 196 |
+
enhanced_prompt = fusion.fuse_prompt_with_audio(
|
| 197 |
+
"base prompt", "style", "mood", features, "audio caption"
|
| 198 |
+
)
|
| 199 |
+
```
|
| 200 |
+
|
| 201 |
+
### Metadata and Tracking
|
| 202 |
+
|
| 203 |
+
Every generated image includes comprehensive metadata:
|
| 204 |
+
|
| 205 |
+
- Original and enhanced prompts
|
| 206 |
+
- Complete audio analysis results
|
| 207 |
+
- Generation parameters
|
| 208 |
+
- Timestamps and seeds
|
| 209 |
+
- Audio tags and classifications
|
| 210 |
+
|
| 211 |
+
## 🎪 Example Use Cases
|
| 212 |
+
|
| 213 |
+
### 1. Music Visualization
|
| 214 |
+
|
| 215 |
+
Transform your favorite songs into visual art:
|
| 216 |
+
|
| 217 |
+
- **Classical**: Orchestral pieces → elegant, flowing compositions
|
| 218 |
+
- **Electronic**: Synthesized music → geometric, neon aesthetics
|
| 219 |
+
- **Jazz**: Improvisational music → abstract, dynamic forms
|
| 220 |
+
- **Ambient**: Atmospheric sounds → ethereal, dreamlike scenes
|
| 221 |
+
|
| 222 |
+
### 2. Voice-to-Art
|
| 223 |
+
|
| 224 |
+
Convert spoken content into visuals:
|
| 225 |
+
|
| 226 |
+
- **Poetry Reading**: Emotional recitation → expressive, literary art
|
| 227 |
+
- **Storytelling**: Narrative audio → scene illustrations
|
| 228 |
+
- **Meditation**: Guided meditation → peaceful, spiritual imagery
|
| 229 |
+
- **Lectures**: Educational content → informative, structured visuals
|
| 230 |
+
|
| 231 |
+
### 3. Environmental Soundscapes
|
| 232 |
+
|
| 233 |
+
Capture the essence of places and moments:
|
| 234 |
+
|
| 235 |
+
- **Nature Sounds**: Forest, ocean, rain → organic, natural scenes
|
| 236 |
+
- **Urban Audio**: City sounds, traffic → industrial, modern aesthetics
|
| 237 |
+
- **Historical**: Period-appropriate audio → era-specific artwork
|
| 238 |
+
- **Sci-Fi**: Futuristic sounds → otherworldly, technological visuals
|
| 239 |
+
|
| 240 |
+
### 4. Therapeutic Applications
|
| 241 |
+
|
| 242 |
+
Use audio-visual generation for wellness:
|
| 243 |
+
|
| 244 |
+
- **Relaxation**: Calming audio → soothing, peaceful imagery
|
| 245 |
+
- **Motivation**: Energetic music → inspiring, powerful visuals
|
| 246 |
+
- **Focus**: Concentration aids → clean, organized compositions
|
| 247 |
+
- **Creativity**: Experimental sounds → abstract, innovative art
|
| 248 |
+
|
| 249 |
+
## 🐛 Troubleshooting
|
| 250 |
+
|
| 251 |
+
### Common Issues
|
| 252 |
+
|
| 253 |
+
**Audio Not Loading**
|
| 254 |
+
|
| 255 |
+
- Check file format (MP3, WAV, FLAC, M4A, OGG supported)
|
| 256 |
+
- Ensure file isn't corrupted
|
| 257 |
+
- Try converting to WAV format
|
| 258 |
+
|
| 259 |
+
**Whisper Model Loading Fails**
|
| 260 |
+
|
| 261 |
+
- Install with: `pip install openai-whisper`
|
| 262 |
+
- Check available disk space (models are 100MB-1GB)
|
| 263 |
+
- Try smaller model size: `--whisper-model tiny`
|
| 264 |
+
|
| 265 |
+
**Generation Too Slow**
|
| 266 |
+
|
| 267 |
+
- Use `--no-caption` to skip audio captioning
|
| 268 |
+
- Reduce `--steps` for faster generation
|
| 269 |
+
- Use smaller Whisper model
|
| 270 |
+
- Process shorter audio clips
|
| 271 |
+
|
| 272 |
+
**Out of Memory**
|
| 273 |
+
|
| 274 |
+
- Use CPU mode: `--device cpu`
|
| 275 |
+
- Reduce image size: `--size 256x256`
|
| 276 |
+
- Close other applications
|
| 277 |
+
- Process one image at a time
|
| 278 |
+
|
| 279 |
+
### Performance Tips
|
| 280 |
+
|
| 281 |
+
1. **GPU Acceleration**: CUDA significantly speeds up generation
|
| 282 |
+
2. **Model Caching**: First run downloads models (1-2GB total)
|
| 283 |
+
3. **Audio Preprocessing**: Shorter clips process faster
|
| 284 |
+
4. **Batch Processing**: More efficient for multiple files
|
| 285 |
+
5. **Memory Management**: Close UI between large batches
|
| 286 |
+
|
| 287 |
+
## 🔮 What's Next?
|
| 288 |
+
|
| 289 |
+
Phase 2.A is just the beginning of CompI's multimodal journey. Coming soon:
|
| 290 |
+
|
| 291 |
+
- **Phase 2.B**: Real-time audio processing and live generation
|
| 292 |
+
- **Phase 2.C**: Video-to-image conditioning
|
| 293 |
+
- **Phase 2.D**: Multi-sensor input fusion
|
| 294 |
+
- **Phase 3.A**: 3D model generation from multimodal input
|
| 295 |
+
|
| 296 |
+
## 📚 Additional Resources
|
| 297 |
+
|
| 298 |
+
- [CompI Project Structure](PROJECT_STRUCTURE.md)
|
| 299 |
+
- [Phase 1 Usage Guide](PHASE1_USAGE.md)
|
| 300 |
+
- [Audio Processing Documentation](src/utils/audio_utils.py)
|
| 301 |
+
- [Example Audio Files] (removed in cleanup)
|
| 302 |
+
|
| 303 |
+
---
|
| 304 |
+
|
| 305 |
+
**Happy Creating! 🎨🎵**
|
| 306 |
+
|
| 307 |
+
_CompI Phase 2.A brings together the worlds of sound and vision, creating art that truly resonates with your audio experiences._
|
docs/PHASE2B_DATA_TO_IMAGE_GUIDE.md
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 2.B: Data/Logic Input to Image Generation
|
| 2 |
+
|
| 3 |
+
## 🚀 Overview
|
| 4 |
+
|
| 5 |
+
Phase 2.B transforms structured data and mathematical formulas into stunning AI-generated art. This phase combines data analysis, pattern recognition, and poetic interpretation to create unique visual experiences that reflect the essence of your data.
|
| 6 |
+
|
| 7 |
+
## ✨ Key Features
|
| 8 |
+
|
| 9 |
+
### 📊 Data Processing
|
| 10 |
+
- **CSV Data Analysis**: Upload spreadsheets, time series, measurements, or any numeric data
|
| 11 |
+
- **Mathematical Formula Evaluation**: Enter Python/NumPy expressions for mathematical art
|
| 12 |
+
- **Pattern Recognition**: Automatic detection of trends, correlations, and seasonality
|
| 13 |
+
- **Statistical Analysis**: Comprehensive data profiling and feature extraction
|
| 14 |
+
|
| 15 |
+
### 🎨 Artistic Integration
|
| 16 |
+
- **Poetic Text Generation**: Convert data patterns into descriptive, artistic language
|
| 17 |
+
- **Data Visualization**: Create beautiful charts and plots from your data
|
| 18 |
+
- **Prompt Enhancement**: Intelligently merge data insights with your creative prompts
|
| 19 |
+
- **Visual Conditioning**: Use data visualizations to inspire AI art generation
|
| 20 |
+
|
| 21 |
+
### 🔧 Technical Capabilities
|
| 22 |
+
- **Safe Formula Execution**: Secure evaluation of mathematical expressions
|
| 23 |
+
- **Batch Processing**: Handle multiple datasets or formulas simultaneously
|
| 24 |
+
- **Comprehensive Metadata**: Detailed logging of all generation parameters
|
| 25 |
+
- **Flexible Output**: Save both generated art and data visualizations
|
| 26 |
+
|
| 27 |
+
## 🛠️ Installation & Setup
|
| 28 |
+
|
| 29 |
+
### Prerequisites
|
| 30 |
+
Ensure you have the base CompI environment set up with all dependencies from `requirements.txt`.
|
| 31 |
+
|
| 32 |
+
### Additional Dependencies
|
| 33 |
+
Phase 2.B uses the existing CompI dependencies, specifically:
|
| 34 |
+
- `pandas>=2.0.0` - Data manipulation and analysis
|
| 35 |
+
- `numpy>=1.24.0` - Mathematical operations
|
| 36 |
+
- `matplotlib>=3.7.0` - Data visualization
|
| 37 |
+
- `seaborn>=0.12.0` - Statistical plotting
|
| 38 |
+
|
| 39 |
+
## 🎯 Quick Start
|
| 40 |
+
|
| 41 |
+
### 1. Launch the Streamlit Interface
|
| 42 |
+
|
| 43 |
+
```bash
|
| 44 |
+
# Navigate to your CompI project directory
|
| 45 |
+
cd "C:\Users\Aksharajsinh\Documents\augment-projects\Project CompI"
|
| 46 |
+
|
| 47 |
+
# Run the Phase 2.B interface
|
| 48 |
+
streamlit run src/ui/compi_phase2b_streamlit_ui.py
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
### 2. Using CSV Data
|
| 52 |
+
|
| 53 |
+
1. **Upload your CSV file** containing numeric data
|
| 54 |
+
2. **Enter your creative prompt** (e.g., "A flowing river of data")
|
| 55 |
+
3. **Set style and mood** (e.g., "abstract digital art", "serene and flowing")
|
| 56 |
+
4. **Click Generate** and watch your data transform into art!
|
| 57 |
+
|
| 58 |
+
### 3. Using Mathematical Formulas
|
| 59 |
+
|
| 60 |
+
1. **Enter a mathematical formula** using Python/NumPy syntax
|
| 61 |
+
2. **Combine with your prompt** for artistic interpretation
|
| 62 |
+
3. **Generate unique mathematical art** based on your equations
|
| 63 |
+
|
| 64 |
+
## 📚 Examples
|
| 65 |
+
|
| 66 |
+
### CSV Data Examples
|
| 67 |
+
|
| 68 |
+
#### Time Series Data
|
| 69 |
+
```csv
|
| 70 |
+
date,temperature,humidity,pressure
|
| 71 |
+
2024-01-01,22.5,65,1013.2
|
| 72 |
+
2024-01-02,23.1,62,1015.8
|
| 73 |
+
2024-01-03,21.8,68,1012.4
|
| 74 |
+
...
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
**Prompt**: "Weather patterns dancing across the sky"
|
| 78 |
+
**Style**: "impressionist painting"
|
| 79 |
+
**Result**: Art inspired by temperature fluctuations and atmospheric pressure
|
| 80 |
+
|
| 81 |
+
#### Financial Data
|
| 82 |
+
```csv
|
| 83 |
+
date,price,volume,volatility
|
| 84 |
+
2024-01-01,100.5,1000000,0.15
|
| 85 |
+
2024-01-02,102.3,1200000,0.18
|
| 86 |
+
2024-01-03,99.8,900000,0.22
|
| 87 |
+
...
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
**Prompt**: "The rhythm of market forces"
|
| 91 |
+
**Style**: "geometric abstract"
|
| 92 |
+
**Result**: Visual representation of market dynamics
|
| 93 |
+
|
| 94 |
+
### Mathematical Formula Examples
|
| 95 |
+
|
| 96 |
+
#### Sine Wave with Decay
|
| 97 |
+
```python
|
| 98 |
+
np.sin(np.linspace(0, 4*np.pi, 100)) * np.exp(-np.linspace(0, 1, 100))
|
| 99 |
+
```
|
| 100 |
+
**Prompt**: "Fading echoes in a digital realm"
|
| 101 |
+
**Result**: Art representing diminishing oscillations
|
| 102 |
+
|
| 103 |
+
#### Spiral Pattern
|
| 104 |
+
```python
|
| 105 |
+
t = np.linspace(0, 4*np.pi, 200)
|
| 106 |
+
np.sin(t) * t
|
| 107 |
+
```
|
| 108 |
+
**Prompt**: "The golden ratio in nature"
|
| 109 |
+
**Result**: Spiral-inspired organic art
|
| 110 |
+
|
| 111 |
+
#### Complex Harmonic
|
| 112 |
+
```python
|
| 113 |
+
x = np.linspace(0, 6*np.pi, 300)
|
| 114 |
+
np.sin(x) + 0.5*np.cos(3*x) + 0.25*np.sin(5*x)
|
| 115 |
+
```
|
| 116 |
+
**Prompt**: "Musical harmonies visualized"
|
| 117 |
+
**Result**: Multi-layered wave patterns
|
| 118 |
+
|
| 119 |
+
## 🎨 Creative Workflow
|
| 120 |
+
|
| 121 |
+
### 1. Data Preparation
|
| 122 |
+
- **Clean your data**: Remove or handle missing values
|
| 123 |
+
- **Choose meaningful columns**: Focus on numeric data that tells a story
|
| 124 |
+
- **Consider time series**: Temporal data often creates compelling patterns
|
| 125 |
+
|
| 126 |
+
### 2. Prompt Engineering
|
| 127 |
+
- **Start with your data story**: What does your data represent?
|
| 128 |
+
- **Add artistic style**: Choose styles that complement your data's nature
|
| 129 |
+
- **Set the mood**: Match the emotional tone to your data's characteristics
|
| 130 |
+
|
| 131 |
+
### 3. Style Recommendations
|
| 132 |
+
|
| 133 |
+
| Data Type | Recommended Styles | Mood Suggestions |
|
| 134 |
+
|-----------|-------------------|------------------|
|
| 135 |
+
| Time Series | flowing, organic, wave-like | rhythmic, temporal, evolving |
|
| 136 |
+
| Statistical | geometric, structured, minimal | analytical, precise, clean |
|
| 137 |
+
| Financial | dynamic, angular, sharp | energetic, volatile, intense |
|
| 138 |
+
| Scientific | technical, detailed, precise | methodical, systematic, clear |
|
| 139 |
+
| Random/Chaotic | abstract, expressionist, wild | unpredictable, chaotic, free |
|
| 140 |
+
|
| 141 |
+
## 🔧 Advanced Usage
|
| 142 |
+
|
| 143 |
+
### Programmatic Access
|
| 144 |
+
|
| 145 |
+
```python
|
| 146 |
+
from src.generators.compi_phase2b_data_to_image import CompIPhase2BDataToImage
|
| 147 |
+
|
| 148 |
+
# Initialize generator
|
| 149 |
+
generator = CompIPhase2BDataToImage()
|
| 150 |
+
|
| 151 |
+
# Generate from CSV
|
| 152 |
+
results = generator.generate_image(
|
| 153 |
+
text_prompt="Data flowing like water",
|
| 154 |
+
style="fluid abstract",
|
| 155 |
+
mood="serene, continuous",
|
| 156 |
+
csv_path="path/to/your/data.csv",
|
| 157 |
+
num_images=2
|
| 158 |
+
)
|
| 159 |
+
|
| 160 |
+
# Generate from formula
|
| 161 |
+
results = generator.generate_image(
|
| 162 |
+
text_prompt="Mathematical harmony",
|
| 163 |
+
style="geometric precision",
|
| 164 |
+
mood="balanced, rhythmic",
|
| 165 |
+
formula="np.sin(np.linspace(0, 4*np.pi, 100))",
|
| 166 |
+
num_images=1
|
| 167 |
+
)
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
### Batch Processing
|
| 171 |
+
|
| 172 |
+
```python
|
| 173 |
+
# Process multiple CSV files
|
| 174 |
+
results = generator.batch_process_csv_files(
|
| 175 |
+
csv_directory="data/experiments/",
|
| 176 |
+
text_prompt="Scientific visualization",
|
| 177 |
+
style="technical illustration",
|
| 178 |
+
mood="precise, analytical"
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# Process multiple formulas
|
| 182 |
+
formulas = [
|
| 183 |
+
"np.sin(x)",
|
| 184 |
+
"np.cos(x)",
|
| 185 |
+
"np.tan(x/2)"
|
| 186 |
+
]
|
| 187 |
+
results = generator.batch_process_formulas(
|
| 188 |
+
formulas=formulas,
|
| 189 |
+
text_prompt="Trigonometric art",
|
| 190 |
+
style="mathematical beauty"
|
| 191 |
+
)
|
| 192 |
+
```
|
| 193 |
+
|
| 194 |
+
## 📊 Understanding Data Features
|
| 195 |
+
|
| 196 |
+
Phase 2.B analyzes your data and extracts several key features:
|
| 197 |
+
|
| 198 |
+
### Statistical Features
|
| 199 |
+
- **Means, Medians, Standard Deviations**: Basic statistical measures
|
| 200 |
+
- **Ranges and Distributions**: Data spread and shape
|
| 201 |
+
- **Trends**: Increasing, decreasing, stable, or volatile patterns
|
| 202 |
+
|
| 203 |
+
### Pattern Features
|
| 204 |
+
- **Correlations**: Relationships between different data columns
|
| 205 |
+
- **Seasonality**: Repeating patterns in time series data
|
| 206 |
+
- **Complexity Score**: Measure of data intricacy (0-1)
|
| 207 |
+
- **Variability Score**: Measure of data diversity (0-1)
|
| 208 |
+
- **Pattern Strength**: Measure of detectable patterns (0-1)
|
| 209 |
+
|
| 210 |
+
### Poetic Interpretation
|
| 211 |
+
The system converts these features into artistic language:
|
| 212 |
+
- **Trend descriptions**: "ascending", "flowing", "turbulent"
|
| 213 |
+
- **Pattern adjectives**: "intricate", "harmonious", "dynamic"
|
| 214 |
+
- **Artistic metaphors**: "like brushstrokes on canvas", "dancing with precision"
|
| 215 |
+
|
| 216 |
+
## 🎯 Tips for Best Results
|
| 217 |
+
|
| 218 |
+
### Data Tips
|
| 219 |
+
1. **Quality over quantity**: Clean, meaningful data works better than large messy datasets
|
| 220 |
+
2. **Numeric focus**: Ensure your CSV has numeric columns for analysis
|
| 221 |
+
3. **Reasonable size**: Keep datasets under 10,000 rows for faster processing
|
| 222 |
+
4. **Meaningful names**: Use descriptive column names for better interpretation
|
| 223 |
+
|
| 224 |
+
### Formula Tips
|
| 225 |
+
1. **Use NumPy functions**: Leverage `np.sin`, `np.cos`, `np.exp`, etc.
|
| 226 |
+
2. **Define ranges**: Use `np.linspace()` to create smooth curves
|
| 227 |
+
3. **Experiment with complexity**: Combine multiple functions for richer patterns
|
| 228 |
+
4. **Consider scale**: Ensure your formula produces reasonable numeric ranges
|
| 229 |
+
|
| 230 |
+
### Prompt Tips
|
| 231 |
+
1. **Be descriptive**: Rich prompts lead to more interesting results
|
| 232 |
+
2. **Match your data**: Align artistic style with data characteristics
|
| 233 |
+
3. **Experiment**: Try different style/mood combinations
|
| 234 |
+
4. **Use the preview**: Check the enhanced prompt before generating
|
| 235 |
+
|
| 236 |
+
## 🔍 Troubleshooting
|
| 237 |
+
|
| 238 |
+
### Common Issues
|
| 239 |
+
|
| 240 |
+
**"Error analyzing data"**
|
| 241 |
+
- Check that your CSV has numeric columns
|
| 242 |
+
- Ensure the file is properly formatted
|
| 243 |
+
- Try with a smaller dataset first
|
| 244 |
+
|
| 245 |
+
**"Invalid formula"**
|
| 246 |
+
- Use only safe mathematical functions
|
| 247 |
+
- Check your NumPy syntax
|
| 248 |
+
- Ensure parentheses are balanced
|
| 249 |
+
|
| 250 |
+
**"Generation failed"**
|
| 251 |
+
- Check your GPU memory if using CUDA
|
| 252 |
+
- Try reducing the number of inference steps
|
| 253 |
+
- Ensure your prompt isn't too long
|
| 254 |
+
|
| 255 |
+
### Performance Optimization
|
| 256 |
+
- Use GPU acceleration when available
|
| 257 |
+
- Reduce image dimensions for faster generation
|
| 258 |
+
- Process smaller datasets for quicker analysis
|
| 259 |
+
- Use fewer inference steps for rapid prototyping
|
| 260 |
+
|
| 261 |
+
## 🚀 Next Steps
|
| 262 |
+
|
| 263 |
+
After mastering Phase 2.B, consider:
|
| 264 |
+
1. **Combining with Phase 2.A**: Use audio + data for multimodal art
|
| 265 |
+
2. **Creating data stories**: Build narratives around your visualizations
|
| 266 |
+
3. **Exploring advanced formulas**: Try complex mathematical expressions
|
| 267 |
+
4. **Building datasets**: Create custom data for specific artistic goals
|
| 268 |
+
|
| 269 |
+
---
|
| 270 |
+
|
| 271 |
+
**Ready to transform your data into art?** Launch the Streamlit interface and start creating! 🎨📊✨
|
docs/PHASE2C_EMOTION_TO_IMAGE_GUIDE.md
ADDED
|
@@ -0,0 +1,286 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 2.C: Emotional/Contextual Input to Image Generation
|
| 2 |
+
|
| 3 |
+
## 🌀 Overview
|
| 4 |
+
|
| 5 |
+
Phase 2.C transforms emotions, moods, and feelings into stunning AI-generated art. This phase combines emotion detection, sentiment analysis, and contextual understanding to create artwork that resonates with your emotional state and inner feelings.
|
| 6 |
+
|
| 7 |
+
## ✨ Key Features
|
| 8 |
+
|
| 9 |
+
### 🎭 Emotion Processing
|
| 10 |
+
- **Preset Emotions**: Choose from 25+ carefully curated emotions
|
| 11 |
+
- **Custom Emotions**: Enter any emotion word or feeling
|
| 12 |
+
- **Emoji Support**: Use emojis to express emotions naturally
|
| 13 |
+
- **Descriptive Text**: Describe complex emotional states in your own words
|
| 14 |
+
- **Sentiment Analysis**: Automatic emotion detection from text using TextBlob
|
| 15 |
+
|
| 16 |
+
### 🎨 Artistic Integration
|
| 17 |
+
- **Emotion-to-Color Mapping**: Automatic color palette generation based on emotions
|
| 18 |
+
- **Artistic Descriptors**: Emotion-specific visual styles and atmospheres
|
| 19 |
+
- **Prompt Enhancement**: Intelligent fusion of emotions with creative prompts
|
| 20 |
+
- **Intensity Levels**: Low, medium, and high emotional intensity processing
|
| 21 |
+
- **Mood Modifiers**: Contextual atmosphere enhancement
|
| 22 |
+
|
| 23 |
+
### 🔧 Technical Capabilities
|
| 24 |
+
- **Multi-Input Support**: Preset, custom, emoji, and text-based emotion input
|
| 25 |
+
- **Confidence Scoring**: Emotion detection confidence levels
|
| 26 |
+
- **Batch Processing**: Generate art for multiple emotions simultaneously
|
| 27 |
+
- **Color Conditioning**: Optional color palette integration into prompts
|
| 28 |
+
- **Comprehensive Metadata**: Detailed emotion analysis and generation tracking
|
| 29 |
+
|
| 30 |
+
## 🛠️ Installation & Setup
|
| 31 |
+
|
| 32 |
+
### Prerequisites
|
| 33 |
+
Ensure you have the base CompI environment set up with all dependencies from `requirements.txt`.
|
| 34 |
+
|
| 35 |
+
### Additional Dependencies
|
| 36 |
+
Phase 2.C uses existing CompI dependencies, specifically:
|
| 37 |
+
- `textblob>=0.17.0` - Sentiment analysis and emotion detection
|
| 38 |
+
- `emoji` (optional) - Enhanced emoji processing
|
| 39 |
+
|
| 40 |
+
### Optional Setup
|
| 41 |
+
For enhanced sentiment analysis, download TextBlob corpora:
|
| 42 |
+
```bash
|
| 43 |
+
python -m textblob.download_corpora
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
## 🎯 Quick Start
|
| 47 |
+
|
| 48 |
+
### 1. Launch the Streamlit Interface
|
| 49 |
+
|
| 50 |
+
```bash
|
| 51 |
+
# Navigate to your CompI project directory
|
| 52 |
+
cd "C:\Users\Aksharajsinh\Documents\augment-projects\Project CompI"
|
| 53 |
+
|
| 54 |
+
# Run the Phase 2.C interface
|
| 55 |
+
streamlit run src/ui/compi_phase2c_streamlit_ui.py
|
| 56 |
+
|
| 57 |
+
# Or use the main CompI interface
|
| 58 |
+
streamlit run compi_complete_app.py
|
| 59 |
+
# Then select "🌀 Phase 2.C: Emotion-to-Image"
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
### 2. Using Preset Emotions
|
| 63 |
+
|
| 64 |
+
1. **Select "Preset Emotions"** as your input method
|
| 65 |
+
2. **Choose an emotion category** (Joy & Happiness, Love & Romance, etc.)
|
| 66 |
+
3. **Pick a specific emotion** from the category
|
| 67 |
+
4. **Enter your creative prompt** and style
|
| 68 |
+
5. **Generate** and watch your emotion transform into art!
|
| 69 |
+
|
| 70 |
+
### 3. Using Custom Emotions or Emojis
|
| 71 |
+
|
| 72 |
+
1. **Select "Custom Emotion/Emoji"** as your input method
|
| 73 |
+
2. **Type any emotion** (e.g., "contemplative", "bittersweet")
|
| 74 |
+
3. **Or use emojis** (🤩, 💫, 🌙) to express feelings
|
| 75 |
+
4. **Use quick emoji buttons** for common emotions
|
| 76 |
+
5. **Generate** emotion-infused artwork
|
| 77 |
+
|
| 78 |
+
### 4. Using Descriptive Text
|
| 79 |
+
|
| 80 |
+
1. **Select "Descriptive Text"** as your input method
|
| 81 |
+
2. **Describe your feeling** in natural language
|
| 82 |
+
3. **Example**: "I feel hopeful after the rain" or "There's anticipation in the air"
|
| 83 |
+
4. **AI analyzes sentiment** and extracts emotional context
|
| 84 |
+
5. **Generate** art based on your emotional description
|
| 85 |
+
|
| 86 |
+
## 📚 Emotion Categories & Examples
|
| 87 |
+
|
| 88 |
+
### 🌟 Joy & Happiness
|
| 89 |
+
- **joyful**: Bright, radiant, effervescent artwork
|
| 90 |
+
- **ecstatic**: High-energy, explosive, vibrant creations
|
| 91 |
+
- **cheerful**: Light, uplifting, warm compositions
|
| 92 |
+
- **uplifting**: Inspiring, elevating, positive imagery
|
| 93 |
+
|
| 94 |
+
### 💙 Sadness & Melancholy
|
| 95 |
+
- **melancholic**: Wistful, contemplative, blue-toned art
|
| 96 |
+
- **nostalgic**: Memory-tinged, sepia-like, reflective pieces
|
| 97 |
+
- **somber**: Muted, serious, thoughtful compositions
|
| 98 |
+
- **wistful**: Longing, gentle sadness, soft imagery
|
| 99 |
+
|
| 100 |
+
### ❤️ Love & Romance
|
| 101 |
+
- **romantic**: Warm, tender, passionate artwork
|
| 102 |
+
- **loving**: Affectionate, caring, heart-centered pieces
|
| 103 |
+
- **passionate**: Intense, fiery, deep emotional art
|
| 104 |
+
- **tender**: Gentle, soft, intimate compositions
|
| 105 |
+
|
| 106 |
+
### 🕊️ Peace & Serenity
|
| 107 |
+
- **peaceful**: Calm, balanced, harmonious imagery
|
| 108 |
+
- **serene**: Tranquil, still, meditative artwork
|
| 109 |
+
- **tranquil**: Quiet, restful, soothing compositions
|
| 110 |
+
- **harmonious**: Balanced, unified, flowing pieces
|
| 111 |
+
|
| 112 |
+
### 🔮 Mystery & Drama
|
| 113 |
+
- **mysterious**: Enigmatic, shadowy, intriguing art
|
| 114 |
+
- **dramatic**: Bold, intense, theatrical compositions
|
| 115 |
+
- **enigmatic**: Puzzling, cryptic, thought-provoking pieces
|
| 116 |
+
- **suspenseful**: Tension-filled, anticipatory artwork
|
| 117 |
+
|
| 118 |
+
### ⚡ Energy & Power
|
| 119 |
+
- **energetic**: Dynamic, vibrant, high-movement art
|
| 120 |
+
- **powerful**: Strong, bold, commanding compositions
|
| 121 |
+
- **intense**: Deep, concentrated, focused imagery
|
| 122 |
+
- **fierce**: Wild, untamed, strong emotional pieces
|
| 123 |
+
|
| 124 |
+
## 🎨 Creative Workflow
|
| 125 |
+
|
| 126 |
+
### 1. Emotion Selection Strategy
|
| 127 |
+
- **Start with your current mood**: What are you feeling right now?
|
| 128 |
+
- **Consider the artwork's purpose**: What emotion should it evoke?
|
| 129 |
+
- **Match emotion to subject**: Align feelings with your prompt content
|
| 130 |
+
- **Experiment with intensity**: Try different emotional strengths
|
| 131 |
+
|
| 132 |
+
### 2. Prompt Engineering with Emotions
|
| 133 |
+
- **Base prompt**: Start with your core visual concept
|
| 134 |
+
- **Emotion integration**: Let the system enhance with emotional context
|
| 135 |
+
- **Style coordination**: Choose styles that complement your emotion
|
| 136 |
+
- **Atmosphere setting**: Use mood modifiers for deeper impact
|
| 137 |
+
|
| 138 |
+
### 3. Emotion-Style Combinations
|
| 139 |
+
|
| 140 |
+
| Emotion | Recommended Styles | Color Palettes | Atmosphere |
|
| 141 |
+
|---------|-------------------|----------------|------------|
|
| 142 |
+
| Joyful | impressionist, vibrant digital art | golds, oranges, bright blues | radiant, luminous |
|
| 143 |
+
| Melancholic | oil painting, watercolor | blues, grays, muted tones | contemplative, wistful |
|
| 144 |
+
| Romantic | soft digital art, renaissance | pinks, reds, warm tones | tender, passionate |
|
| 145 |
+
| Mysterious | dark fantasy, gothic | purples, blacks, deep blues | enigmatic, shadowy |
|
| 146 |
+
| Energetic | abstract, dynamic digital | bright colors, neons | electric, vibrant |
|
| 147 |
+
| Peaceful | minimalist, zen art | soft greens, blues, whites | serene, harmonious |
|
| 148 |
+
|
| 149 |
+
## 🔧 Advanced Usage
|
| 150 |
+
|
| 151 |
+
### Programmatic Access
|
| 152 |
+
|
| 153 |
+
```python
|
| 154 |
+
from src.generators.compi_phase2c_emotion_to_image import CompIPhase2CEmotionToImage
|
| 155 |
+
|
| 156 |
+
# Initialize generator
|
| 157 |
+
generator = CompIPhase2CEmotionToImage()
|
| 158 |
+
|
| 159 |
+
# Generate with preset emotion
|
| 160 |
+
results = generator.generate_image(
|
| 161 |
+
text_prompt="A mystical forest",
|
| 162 |
+
style="digital painting",
|
| 163 |
+
emotion_input="mysterious",
|
| 164 |
+
emotion_type="preset",
|
| 165 |
+
enhancement_strength=0.8,
|
| 166 |
+
num_images=2
|
| 167 |
+
)
|
| 168 |
+
|
| 169 |
+
# Generate with custom emotion
|
| 170 |
+
results = generator.generate_image(
|
| 171 |
+
text_prompt="Urban landscape",
|
| 172 |
+
style="cyberpunk",
|
| 173 |
+
emotion_input="🤖",
|
| 174 |
+
emotion_type="custom",
|
| 175 |
+
enhancement_strength=0.6
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
# Generate with descriptive text
|
| 179 |
+
results = generator.generate_image(
|
| 180 |
+
text_prompt="Mountain vista",
|
| 181 |
+
style="landscape painting",
|
| 182 |
+
emotion_input="I feel a sense of wonder and awe",
|
| 183 |
+
emotion_type="text",
|
| 184 |
+
contextual_text="Standing at the peak, overwhelmed by nature's beauty"
|
| 185 |
+
)
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
### Batch Processing
|
| 189 |
+
|
| 190 |
+
```python
|
| 191 |
+
# Process multiple emotions
|
| 192 |
+
emotions = ["joyful", "melancholic", "mysterious", "energetic"]
|
| 193 |
+
results = generator.batch_process_emotions(
|
| 194 |
+
text_prompt="Abstract composition",
|
| 195 |
+
style="modern art",
|
| 196 |
+
emotions=emotions,
|
| 197 |
+
enhancement_strength=0.7
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
# Color palette conditioning
|
| 201 |
+
results = generator.generate_emotion_palette_art(
|
| 202 |
+
text_prompt="Flowing water",
|
| 203 |
+
style="fluid art",
|
| 204 |
+
emotion_input="peaceful",
|
| 205 |
+
use_color_conditioning=True
|
| 206 |
+
)
|
| 207 |
+
```
|
| 208 |
+
|
| 209 |
+
## 📊 Understanding Emotion Analysis
|
| 210 |
+
|
| 211 |
+
Phase 2.C analyzes emotions across multiple dimensions:
|
| 212 |
+
|
| 213 |
+
### Emotion Detection
|
| 214 |
+
- **Primary Emotion**: Main detected emotion category
|
| 215 |
+
- **Confidence Score**: How certain the system is (0-1)
|
| 216 |
+
- **Secondary Emotions**: Related emotional states
|
| 217 |
+
- **Intensity Level**: Low, medium, or high emotional strength
|
| 218 |
+
|
| 219 |
+
### Sentiment Analysis
|
| 220 |
+
- **Polarity**: Negative (-1) to Positive (+1) sentiment
|
| 221 |
+
- **Subjectivity**: Objective (0) to Subjective (1) content
|
| 222 |
+
- **Keywords**: Emotion-related words detected in text
|
| 223 |
+
- **Emojis**: Emotional emojis found in input
|
| 224 |
+
|
| 225 |
+
### Artistic Mapping
|
| 226 |
+
- **Color Palette**: 3-5 colors representing the emotion
|
| 227 |
+
- **Artistic Descriptors**: Visual style words (vibrant, muted, etc.)
|
| 228 |
+
- **Mood Modifiers**: Atmospheric enhancements
|
| 229 |
+
- **Enhancement Tags**: Descriptive tags for the emotion
|
| 230 |
+
|
| 231 |
+
## 🎯 Tips for Best Results
|
| 232 |
+
|
| 233 |
+
### Emotion Selection Tips
|
| 234 |
+
1. **Be specific**: "melancholic" is more precise than "sad"
|
| 235 |
+
2. **Consider intensity**: Strong emotions create more dramatic art
|
| 236 |
+
3. **Match context**: Align emotions with your prompt's subject matter
|
| 237 |
+
4. **Experiment freely**: Try unexpected emotion-prompt combinations
|
| 238 |
+
|
| 239 |
+
### Prompt Enhancement Tips
|
| 240 |
+
1. **Start simple**: Let emotions enhance rather than complicate
|
| 241 |
+
2. **Trust the system**: Emotion analysis often captures nuances you might miss
|
| 242 |
+
3. **Adjust strength**: Use the enhancement slider to control emotional impact
|
| 243 |
+
4. **Combine thoughtfully**: Ensure emotions complement your artistic vision
|
| 244 |
+
|
| 245 |
+
### Style Coordination Tips
|
| 246 |
+
1. **Emotional styles**: Some styles naturally align with certain emotions
|
| 247 |
+
2. **Color harmony**: Consider how emotion colors work with your chosen style
|
| 248 |
+
3. **Atmospheric consistency**: Ensure mood modifiers enhance rather than conflict
|
| 249 |
+
4. **Intensity matching**: High-intensity emotions work well with bold styles
|
| 250 |
+
|
| 251 |
+
## 🔍 Troubleshooting
|
| 252 |
+
|
| 253 |
+
### Common Issues
|
| 254 |
+
|
| 255 |
+
**"Emotion not detected"**
|
| 256 |
+
- Try more specific emotion words
|
| 257 |
+
- Use descriptive phrases instead of single words
|
| 258 |
+
- Check for typos in emotion input
|
| 259 |
+
|
| 260 |
+
**"Weak emotional enhancement"**
|
| 261 |
+
- Increase the enhancement strength slider
|
| 262 |
+
- Use more emotionally charged language
|
| 263 |
+
- Try preset emotions for stronger effects
|
| 264 |
+
|
| 265 |
+
**"Conflicting emotional signals"**
|
| 266 |
+
- Simplify your emotional input
|
| 267 |
+
- Focus on one primary emotion
|
| 268 |
+
- Avoid mixing opposing emotions
|
| 269 |
+
|
| 270 |
+
### Performance Optimization
|
| 271 |
+
- Use preset emotions for fastest processing
|
| 272 |
+
- Shorter descriptive texts analyze faster
|
| 273 |
+
- Batch processing is more efficient for multiple emotions
|
| 274 |
+
- GPU acceleration improves generation speed
|
| 275 |
+
|
| 276 |
+
## 🚀 Next Steps
|
| 277 |
+
|
| 278 |
+
After mastering Phase 2.C, consider:
|
| 279 |
+
1. **Multimodal combinations**: Combine emotions with audio (Phase 2.A) or data (Phase 2.B)
|
| 280 |
+
2. **Emotional storytelling**: Create series of images with evolving emotions
|
| 281 |
+
3. **Personal emotion mapping**: Develop your own emotion-to-art style
|
| 282 |
+
4. **Therapeutic applications**: Use emotional art for self-expression and healing
|
| 283 |
+
|
| 284 |
+
---
|
| 285 |
+
|
| 286 |
+
**Ready to transform your emotions into art?** Launch the interface and start creating emotionally-rich artwork! 🌀🎨✨
|
docs/PHASE2D_REALTIME_DATA_TO_IMAGE_GUIDE.md
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 2.D: Real-Time Data Feeds to Image Generation
|
| 2 |
+
|
| 3 |
+
## 🌎 Overview
|
| 4 |
+
|
| 5 |
+
Phase 2.D connects your art to the pulse of the world through real-time data feeds. This phase integrates live weather data, breaking news, financial markets, and other real-time information to create artwork that captures the current moment in time and reflects the world's dynamic state.
|
| 6 |
+
|
| 7 |
+
## ✨ Key Features
|
| 8 |
+
|
| 9 |
+
### 🌐 Real-Time Data Integration
|
| 10 |
+
- **Weather Data**: Live weather conditions from OpenWeatherMap API
|
| 11 |
+
- **News Headlines**: Breaking news from RSS feeds and NewsAPI
|
| 12 |
+
- **Financial Data**: Cryptocurrency prices and exchange rates
|
| 13 |
+
- **Social Trends**: Real-time social media and trending topics (extensible)
|
| 14 |
+
- **Custom RSS Feeds**: Support for any RSS/XML data source
|
| 15 |
+
|
| 16 |
+
### 🧠 Intelligent Context Processing
|
| 17 |
+
- **Data Summarization**: Automatic summarization of multiple data sources
|
| 18 |
+
- **Mood Detection**: Extract emotional context from real-time data
|
| 19 |
+
- **Theme Analysis**: Identify key themes and topics
|
| 20 |
+
- **Temporal Context**: Time-aware data processing and analysis
|
| 21 |
+
- **Artistic Inspiration**: Convert data patterns into creative prompts
|
| 22 |
+
|
| 23 |
+
### 🔧 Technical Capabilities
|
| 24 |
+
- **Data Caching**: Intelligent caching to respect API rate limits
|
| 25 |
+
- **Batch Processing**: Multiple data source configurations
|
| 26 |
+
- **Temporal Series**: Generate art evolution over time
|
| 27 |
+
- **Error Handling**: Robust fallback mechanisms for API failures
|
| 28 |
+
- **Comprehensive Metadata**: Detailed real-time context tracking
|
| 29 |
+
|
| 30 |
+
## 🛠️ Installation & Setup
|
| 31 |
+
|
| 32 |
+
### Prerequisites
|
| 33 |
+
Ensure you have the base CompI environment set up with all dependencies from `requirements.txt`.
|
| 34 |
+
|
| 35 |
+
### Additional Dependencies
|
| 36 |
+
Phase 2.D uses additional packages for real-time data processing:
|
| 37 |
+
```bash
|
| 38 |
+
pip install requests feedparser
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### API Keys (Optional)
|
| 42 |
+
While Phase 2.D works with free data sources, you can enhance functionality with API keys:
|
| 43 |
+
|
| 44 |
+
#### OpenWeatherMap (Weather Data)
|
| 45 |
+
1. Sign up at [OpenWeatherMap](https://openweathermap.org/api)
|
| 46 |
+
2. Get your free API key (1000 calls/day)
|
| 47 |
+
3. Enter in the interface or set as environment variable
|
| 48 |
+
|
| 49 |
+
#### NewsAPI (News Data)
|
| 50 |
+
1. Sign up at [NewsAPI](https://newsapi.org/)
|
| 51 |
+
2. Get your free API key (100 requests/day)
|
| 52 |
+
3. Enter in the interface or set as environment variable
|
| 53 |
+
|
| 54 |
+
**Note**: Phase 2.D works without API keys using free RSS feeds and demo keys.
|
| 55 |
+
|
| 56 |
+
## 🎯 Quick Start
|
| 57 |
+
|
| 58 |
+
### 1. Launch the Interface
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
# Navigate to your CompI project directory
|
| 62 |
+
cd "C:\Users\Aksharajsinh\Documents\augment-projects\Project CompI"
|
| 63 |
+
|
| 64 |
+
# Run the Phase 2.D interface
|
| 65 |
+
streamlit run src/ui/compi_phase2d_streamlit_ui.py
|
| 66 |
+
|
| 67 |
+
# Or use the main CompI interface
|
| 68 |
+
streamlit run compi_complete_app.py
|
| 69 |
+
# Then select "🌎 Phase 2.D: Real-Time Data-to-Image"
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
### 2. Basic Real-Time Generation
|
| 73 |
+
|
| 74 |
+
1. **Enter your creative prompt** (e.g., "A cityscape reflecting today's energy")
|
| 75 |
+
2. **Choose your art style** (e.g., "cyberpunk digital art")
|
| 76 |
+
3. **Enable data sources** (Weather, News, or Financial)
|
| 77 |
+
4. **Configure data settings** (city for weather, news category, etc.)
|
| 78 |
+
5. **Generate** and watch real-time data transform into art!
|
| 79 |
+
|
| 80 |
+
### 3. Advanced Features
|
| 81 |
+
|
| 82 |
+
- **Batch Processing**: Generate multiple images with different data combinations
|
| 83 |
+
- **Temporal Series**: Create art evolution over time intervals
|
| 84 |
+
- **Context Strength**: Control how strongly real-time data influences the art
|
| 85 |
+
- **Data Preview**: See real-time context before generation
|
| 86 |
+
|
| 87 |
+
## 📚 Data Sources & Examples
|
| 88 |
+
|
| 89 |
+
### 🌤️ Weather Data Integration
|
| 90 |
+
|
| 91 |
+
#### Current Weather Conditions
|
| 92 |
+
```python
|
| 93 |
+
# Example: Sunny weather in Paris
|
| 94 |
+
Weather Context: "Clear skies, 22°C, low humidity"
|
| 95 |
+
Artistic Influence: "bright and optimistic atmosphere"
|
| 96 |
+
Enhanced Prompt: "Parisian street scene, impressionist style, bright and optimistic atmosphere"
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
#### Weather Mood Mapping
|
| 100 |
+
- **Clear/Sunny**: Bright, optimistic, radiant
|
| 101 |
+
- **Cloudy**: Contemplative, soft, muted
|
| 102 |
+
- **Rainy**: Melancholic, reflective, dramatic
|
| 103 |
+
- **Stormy**: Intense, powerful, dynamic
|
| 104 |
+
- **Snowy**: Serene, peaceful, ethereal
|
| 105 |
+
- **Foggy**: Mysterious, ethereal, dreamlike
|
| 106 |
+
|
| 107 |
+
### 📰 News Data Integration
|
| 108 |
+
|
| 109 |
+
#### Breaking News Headlines
|
| 110 |
+
```python
|
| 111 |
+
# Example: Technology news
|
| 112 |
+
Headlines: "AI breakthrough in medical research; New space mission launched"
|
| 113 |
+
Artistic Influence: "capturing the pulse of current events, inspired by innovation"
|
| 114 |
+
Enhanced Prompt: "Futuristic laboratory, sci-fi art, capturing innovation and discovery"
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
#### News Category Mapping
|
| 118 |
+
- **Technology**: Futuristic, innovative, digital
|
| 119 |
+
- **Science**: Discovery, exploration, analytical
|
| 120 |
+
- **World**: Global, diverse, interconnected
|
| 121 |
+
- **Business**: Dynamic, structured, professional
|
| 122 |
+
- **General**: Contemporary, relevant, timely
|
| 123 |
+
|
| 124 |
+
### 💹 Financial Data Integration
|
| 125 |
+
|
| 126 |
+
#### Market Conditions
|
| 127 |
+
```python
|
| 128 |
+
# Example: Rising Bitcoin price
|
| 129 |
+
Financial Context: "Bitcoin: $45,000 USD, USD/EUR: 0.85"
|
| 130 |
+
Artistic Influence: "reflecting market dynamics and economic energy"
|
| 131 |
+
Enhanced Prompt: "Abstract composition, geometric art, reflecting economic energy and growth"
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
#### Market Mood Indicators
|
| 135 |
+
- **Rising Markets**: Energetic, upward, optimistic
|
| 136 |
+
- **Falling Markets**: Dramatic, intense, volatile
|
| 137 |
+
- **Stable Markets**: Balanced, steady, calm
|
| 138 |
+
- **High Volatility**: Dynamic, chaotic, electric
|
| 139 |
+
|
| 140 |
+
## 🎨 Creative Workflows
|
| 141 |
+
|
| 142 |
+
### 1. Moment Capture Workflow
|
| 143 |
+
**Goal**: Capture the current moment in artistic form
|
| 144 |
+
|
| 145 |
+
1. **Enable all data sources** (Weather + News + Financial)
|
| 146 |
+
2. **Use high context strength** (0.8-1.0)
|
| 147 |
+
3. **Choose responsive styles** (abstract, impressionist, contemporary)
|
| 148 |
+
4. **Generate immediately** to capture the current moment
|
| 149 |
+
|
| 150 |
+
### 2. Temporal Evolution Workflow
|
| 151 |
+
**Goal**: Show how the world changes over time
|
| 152 |
+
|
| 153 |
+
1. **Configure temporal series** (e.g., every 30 minutes)
|
| 154 |
+
2. **Use consistent prompt and style**
|
| 155 |
+
3. **Enable news feeds** for evolving content
|
| 156 |
+
4. **Create time-lapse art series**
|
| 157 |
+
|
| 158 |
+
### 3. Location-Based Workflow
|
| 159 |
+
**Goal**: Create art reflecting specific locations
|
| 160 |
+
|
| 161 |
+
1. **Enable weather data** for target city
|
| 162 |
+
2. **Use location-specific news** if available
|
| 163 |
+
3. **Choose appropriate styles** (landscape, urban, cultural)
|
| 164 |
+
4. **Incorporate local context** in prompts
|
| 165 |
+
|
| 166 |
+
### 4. Thematic Workflow
|
| 167 |
+
**Goal**: Focus on specific themes or topics
|
| 168 |
+
|
| 169 |
+
1. **Select relevant news categories** (technology, science, etc.)
|
| 170 |
+
2. **Use thematic prompts** aligned with data
|
| 171 |
+
3. **Adjust context strength** based on desired influence
|
| 172 |
+
4. **Create thematic art series**
|
| 173 |
+
|
| 174 |
+
## 🔧 Advanced Usage
|
| 175 |
+
|
| 176 |
+
### Programmatic Access
|
| 177 |
+
|
| 178 |
+
```python
|
| 179 |
+
from src.generators.compi_phase2d_realtime_to_image import CompIPhase2DRealTimeToImage
|
| 180 |
+
|
| 181 |
+
# Initialize generator
|
| 182 |
+
generator = CompIPhase2DRealTimeToImage()
|
| 183 |
+
|
| 184 |
+
# Generate with weather data
|
| 185 |
+
results = generator.generate_image(
|
| 186 |
+
text_prompt="A landscape reflecting today's weather",
|
| 187 |
+
style="impressionist painting",
|
| 188 |
+
include_weather=True,
|
| 189 |
+
weather_city="Tokyo",
|
| 190 |
+
weather_api_key="your_api_key", # Optional
|
| 191 |
+
context_strength=0.8,
|
| 192 |
+
num_images=2
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
# Generate with news data
|
| 196 |
+
results = generator.generate_image(
|
| 197 |
+
text_prompt="Abstract representation of current events",
|
| 198 |
+
style="modern digital art",
|
| 199 |
+
include_news=True,
|
| 200 |
+
news_category="technology",
|
| 201 |
+
max_news=5,
|
| 202 |
+
context_strength=0.7
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Generate with all data sources
|
| 206 |
+
results = generator.generate_image(
|
| 207 |
+
text_prompt="The world's current state",
|
| 208 |
+
style="surreal digital art",
|
| 209 |
+
include_weather=True,
|
| 210 |
+
weather_city="New York",
|
| 211 |
+
include_news=True,
|
| 212 |
+
news_category="world",
|
| 213 |
+
include_financial=True,
|
| 214 |
+
context_strength=0.9
|
| 215 |
+
)
|
| 216 |
+
```
|
| 217 |
+
|
| 218 |
+
### Batch Processing
|
| 219 |
+
|
| 220 |
+
```python
|
| 221 |
+
# Multiple data source configurations
|
| 222 |
+
data_configs = [
|
| 223 |
+
{"include_weather": True, "weather_city": "London"},
|
| 224 |
+
{"include_news": True, "news_category": "technology"},
|
| 225 |
+
{"include_financial": True},
|
| 226 |
+
{"include_weather": True, "include_news": True, "include_financial": True}
|
| 227 |
+
]
|
| 228 |
+
|
| 229 |
+
results = generator.batch_process_data_sources(
|
| 230 |
+
text_prompt="Global perspectives",
|
| 231 |
+
style="contemporary art",
|
| 232 |
+
data_source_configs=data_configs,
|
| 233 |
+
context_strength=0.7
|
| 234 |
+
)
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
### Temporal Series Generation
|
| 238 |
+
|
| 239 |
+
```python
|
| 240 |
+
# Generate art evolution over time
|
| 241 |
+
results = generator.generate_temporal_series(
|
| 242 |
+
text_prompt="The changing world",
|
| 243 |
+
style="abstract expressionism",
|
| 244 |
+
data_config={
|
| 245 |
+
"include_weather": True,
|
| 246 |
+
"weather_city": "Paris",
|
| 247 |
+
"include_news": True,
|
| 248 |
+
"news_category": "general"
|
| 249 |
+
},
|
| 250 |
+
time_intervals=[0, 30, 60, 120], # 0, 30min, 1hr, 2hr
|
| 251 |
+
context_strength=0.8
|
| 252 |
+
)
|
| 253 |
+
```
|
| 254 |
+
|
| 255 |
+
## 📊 Understanding Real-Time Context
|
| 256 |
+
|
| 257 |
+
Phase 2.D processes real-time data across multiple dimensions:
|
| 258 |
+
|
| 259 |
+
### Data Processing Pipeline
|
| 260 |
+
1. **Data Fetching**: Retrieve data from multiple APIs and feeds
|
| 261 |
+
2. **Caching**: Store data to respect rate limits and improve performance
|
| 262 |
+
3. **Analysis**: Extract mood indicators, themes, and patterns
|
| 263 |
+
4. **Summarization**: Create concise summaries of current context
|
| 264 |
+
5. **Artistic Translation**: Convert data insights into creative prompts
|
| 265 |
+
|
| 266 |
+
### Context Components
|
| 267 |
+
- **Summary**: Concise description of all data sources
|
| 268 |
+
- **Mood Indicators**: Emotional context derived from data
|
| 269 |
+
- **Key Themes**: Main topics and subjects identified
|
| 270 |
+
- **Temporal Context**: Time-aware contextual information
|
| 271 |
+
- **Artistic Inspiration**: Creative interpretation for prompt enhancement
|
| 272 |
+
|
| 273 |
+
### Context Strength Levels
|
| 274 |
+
- **High (0.7-1.0)**: Strong data influence, detailed context integration
|
| 275 |
+
- **Medium (0.4-0.6)**: Moderate data influence, balanced integration
|
| 276 |
+
- **Low (0.1-0.3)**: Subtle data influence, minimal context addition
|
| 277 |
+
|
| 278 |
+
## 🎯 Tips for Best Results
|
| 279 |
+
|
| 280 |
+
### Data Source Selection
|
| 281 |
+
1. **Weather**: Best for location-specific, atmospheric art
|
| 282 |
+
2. **News**: Ideal for contemporary, socially-relevant themes
|
| 283 |
+
3. **Financial**: Great for abstract, dynamic, economic themes
|
| 284 |
+
4. **Combined**: Use multiple sources for rich, complex context
|
| 285 |
+
|
| 286 |
+
### Prompt Engineering
|
| 287 |
+
1. **Responsive prompts**: Use prompts that can adapt to data context
|
| 288 |
+
2. **Flexible styles**: Choose styles that work with various moods
|
| 289 |
+
3. **Context awareness**: Consider how data might influence your vision
|
| 290 |
+
4. **Temporal relevance**: Use time-aware language when appropriate
|
| 291 |
+
|
| 292 |
+
### Context Strength Guidelines
|
| 293 |
+
1. **High strength**: When data should drive the artistic direction
|
| 294 |
+
2. **Medium strength**: For balanced data-art integration
|
| 295 |
+
3. **Low strength**: When data should provide subtle inspiration
|
| 296 |
+
4. **Variable strength**: Experiment to find optimal balance
|
| 297 |
+
|
| 298 |
+
## 🔍 Troubleshooting
|
| 299 |
+
|
| 300 |
+
### Common Issues
|
| 301 |
+
|
| 302 |
+
**"No real-time data available"**
|
| 303 |
+
- Check internet connection
|
| 304 |
+
- Verify API keys if using premium features
|
| 305 |
+
- Try different data sources
|
| 306 |
+
- Check API rate limits
|
| 307 |
+
|
| 308 |
+
**"API connection failed"**
|
| 309 |
+
- Verify API keys are correct
|
| 310 |
+
- Check if APIs are operational
|
| 311 |
+
- Try using free RSS feeds instead
|
| 312 |
+
- Reduce request frequency
|
| 313 |
+
|
| 314 |
+
**"Weak data influence"**
|
| 315 |
+
- Increase context strength
|
| 316 |
+
- Use more responsive prompts
|
| 317 |
+
- Enable multiple data sources
|
| 318 |
+
- Check data quality and relevance
|
| 319 |
+
|
| 320 |
+
### Performance Optimization
|
| 321 |
+
- Use data caching to reduce API calls
|
| 322 |
+
- Enable only needed data sources
|
| 323 |
+
- Use appropriate context strength levels
|
| 324 |
+
- Monitor API rate limits and usage
|
| 325 |
+
|
| 326 |
+
## 🚀 Next Steps
|
| 327 |
+
|
| 328 |
+
After mastering Phase 2.D, consider:
|
| 329 |
+
1. **Multimodal Fusion**: Combine real-time data with emotions (2.C) or audio (2.A)
|
| 330 |
+
2. **Custom Data Sources**: Add your own RSS feeds or APIs
|
| 331 |
+
3. **Temporal Art Projects**: Create long-term data evolution series
|
| 332 |
+
4. **Location-Based Art**: Develop city or region-specific art projects
|
| 333 |
+
5. **News Art Automation**: Set up automated news-driven art generation
|
| 334 |
+
|
| 335 |
+
---
|
| 336 |
+
|
| 337 |
+
**Ready to connect your art to the world's pulse?** Launch the interface and start creating real-time responsive artwork! 🌎📡🎨
|
docs/PHASE2E_STYLE_REFERENCE_GUIDE.md
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Phase 2.E: Style Reference/Example Image to AI Art - User Guide
|
| 2 |
+
|
| 3 |
+
## 🎨 Transform Any Image into AI Art Guidance
|
| 4 |
+
|
| 5 |
+
Phase 2.E allows you to use **any reference image** (from your device or the web) to guide the style, mood, and composition of your AI-generated art. Upload local files or paste URLs from Google Images, Pinterest, Instagram, or any other source!
|
| 6 |
+
|
| 7 |
+
## 🚀 Quick Start
|
| 8 |
+
|
| 9 |
+
### 1. **Launch the Application**
|
| 10 |
+
```bash
|
| 11 |
+
streamlit run src/ui/compi_phase2e_streamlit_ui.py
|
| 12 |
+
```
|
| 13 |
+
|
| 14 |
+
### 2. **Basic Workflow**
|
| 15 |
+
1. **Enter your text prompt** - Describe what you want to create
|
| 16 |
+
2. **Add reference image** - Upload file or paste web URL
|
| 17 |
+
3. **Review AI suggestions** - See automatic style analysis
|
| 18 |
+
4. **Adjust settings** - Control reference strength and parameters
|
| 19 |
+
5. **Generate art** - Create AI art guided by your reference
|
| 20 |
+
6. **Download results** - Save images with full metadata
|
| 21 |
+
|
| 22 |
+
## 📸 Reference Image Sources
|
| 23 |
+
|
| 24 |
+
### **Supported Input Methods**
|
| 25 |
+
|
| 26 |
+
#### 🖼️ **Local File Upload**
|
| 27 |
+
- **Formats**: PNG, JPG, JPEG, BMP, TIFF, WebP
|
| 28 |
+
- **Size Limit**: Up to 10MB per file
|
| 29 |
+
- **Quality**: Higher resolution = better style analysis
|
| 30 |
+
|
| 31 |
+
#### 🌐 **Web URL Input**
|
| 32 |
+
- **Google Images**: Right-click → "Copy image address"
|
| 33 |
+
- **Pinterest**: Click image → Copy URL from address bar
|
| 34 |
+
- **Instagram**: Use image direct links
|
| 35 |
+
- **Art Websites**: DeviantArt, ArtStation, Behance
|
| 36 |
+
- **Any Website**: Direct image URLs (.jpg, .png, etc.)
|
| 37 |
+
|
| 38 |
+
### **URL Examples**
|
| 39 |
+
```
|
| 40 |
+
✅ Good URLs:
|
| 41 |
+
https://example.com/artwork.jpg
|
| 42 |
+
https://pinterest.com/pin/123456789/
|
| 43 |
+
https://images.unsplash.com/photo-123/image.jpg
|
| 44 |
+
|
| 45 |
+
❌ Avoid:
|
| 46 |
+
https://website.com/gallery-page (not direct image)
|
| 47 |
+
https://social-media.com/post/123 (post page, not image)
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
## 🎛️ Interface Guide
|
| 51 |
+
|
| 52 |
+
### **Main Controls**
|
| 53 |
+
|
| 54 |
+
#### **Text Input Section**
|
| 55 |
+
- **Main Prompt**: Primary description of what you want to generate
|
| 56 |
+
- **Style Keywords**: Additional artistic style descriptors
|
| 57 |
+
- **Mood/Atmosphere**: Emotional tone and feeling
|
| 58 |
+
|
| 59 |
+
#### **Reference Image Section**
|
| 60 |
+
- **Upload Tab**: Drag & drop or browse for local files
|
| 61 |
+
- **URL Tab**: Paste any web image URL
|
| 62 |
+
- **Analysis Display**: Real-time style analysis and suggestions
|
| 63 |
+
|
| 64 |
+
#### **Generation Settings**
|
| 65 |
+
- **Reference Strength**: How closely to follow the reference (0.1-0.9)
|
| 66 |
+
- **Number of Images**: Generate 1-4 variations
|
| 67 |
+
- **Quality Settings**: Inference steps and guidance scale
|
| 68 |
+
- **Seed Control**: Random or fixed for reproducibility
|
| 69 |
+
|
| 70 |
+
### **AI Style Analysis**
|
| 71 |
+
|
| 72 |
+
When you load a reference image, the AI automatically analyzes:
|
| 73 |
+
|
| 74 |
+
- **Visual Properties**: Brightness, contrast, color distribution
|
| 75 |
+
- **Style Characteristics**: Artistic technique, complexity, mood
|
| 76 |
+
- **Suggested Keywords**: Automatically generated style descriptors
|
| 77 |
+
- **Enhancement Options**: One-click addition to your prompt
|
| 78 |
+
|
| 79 |
+
## ⚙️ Settings Guide
|
| 80 |
+
|
| 81 |
+
### **Reference Strength Control**
|
| 82 |
+
|
| 83 |
+
The reference strength determines how closely your generated art follows the reference image:
|
| 84 |
+
|
| 85 |
+
| Strength | Effect | Best For |
|
| 86 |
+
|----------|--------|----------|
|
| 87 |
+
| **0.1-0.3** | Loose inspiration, high creativity | Abstract concepts, creative freedom |
|
| 88 |
+
| **0.4-0.6** | Balanced style transfer | Most use cases, artistic guidance |
|
| 89 |
+
| **0.7-0.9** | Close adherence to reference | Style mimicking, specific looks |
|
| 90 |
+
|
| 91 |
+
### **Quality vs Speed Settings**
|
| 92 |
+
|
| 93 |
+
| Setting | Fast (10-15 steps) | Balanced (20-30 steps) | High Quality (40-50 steps) |
|
| 94 |
+
|---------|-------------------|------------------------|---------------------------|
|
| 95 |
+
| **Time** | 30-60 seconds | 1-2 minutes | 3-5 minutes |
|
| 96 |
+
| **Quality** | Good for testing | Recommended | Best results |
|
| 97 |
+
| **Use Case** | Quick iterations | Final generation | Professional work |
|
| 98 |
+
|
| 99 |
+
### **Guidance Scale**
|
| 100 |
+
|
| 101 |
+
| Scale | Effect | Best For |
|
| 102 |
+
|-------|--------|----------|
|
| 103 |
+
| **5-10** | More creative, loose interpretation | Artistic freedom, abstract art |
|
| 104 |
+
| **10-15** | Balanced prompt following | Most use cases |
|
| 105 |
+
| **15-20** | Strict prompt adherence | Precise requirements |
|
| 106 |
+
|
| 107 |
+
## 🎨 Creative Techniques
|
| 108 |
+
|
| 109 |
+
### **Effective Reference Selection**
|
| 110 |
+
|
| 111 |
+
#### ✅ **Good References**
|
| 112 |
+
- **Clear artistic style** (paintings, digital art, photography styles)
|
| 113 |
+
- **Strong visual identity** (distinctive colors, techniques, moods)
|
| 114 |
+
- **Good composition** (well-balanced, not cluttered)
|
| 115 |
+
- **High contrast** (clear light/dark areas)
|
| 116 |
+
|
| 117 |
+
#### ❌ **Challenging References**
|
| 118 |
+
- **Cluttered images** with too many elements
|
| 119 |
+
- **Low contrast** or very dark/bright images
|
| 120 |
+
- **Screenshots** or UI elements
|
| 121 |
+
- **Text-heavy** images
|
| 122 |
+
|
| 123 |
+
### **Prompt Enhancement Tips**
|
| 124 |
+
|
| 125 |
+
#### **Combine Multiple Styles**
|
| 126 |
+
```
|
| 127 |
+
Base: "A serene mountain landscape"
|
| 128 |
+
+ Style: "oil painting, impressionist"
|
| 129 |
+
+ Mood: "golden hour, peaceful"
|
| 130 |
+
+ AI Suggestions: "soft brushstrokes, warm colors"
|
| 131 |
+
= Enhanced: "A serene mountain landscape, oil painting, impressionist, golden hour, peaceful, soft brushstrokes, warm colors"
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
#### **Layer Your Descriptions**
|
| 135 |
+
1. **Subject**: What you want to see
|
| 136 |
+
2. **Style**: Artistic technique or medium
|
| 137 |
+
3. **Mood**: Emotional atmosphere
|
| 138 |
+
4. **Details**: Specific elements or effects
|
| 139 |
+
|
| 140 |
+
### **Reference Strength Strategies**
|
| 141 |
+
|
| 142 |
+
#### **Creative Exploration** (Low Strength: 0.2-0.4)
|
| 143 |
+
- Use reference for general mood/color inspiration
|
| 144 |
+
- Allow AI maximum creative freedom
|
| 145 |
+
- Good for abstract or conceptual art
|
| 146 |
+
|
| 147 |
+
#### **Style Transfer** (Medium Strength: 0.5-0.7)
|
| 148 |
+
- Balance between reference and creativity
|
| 149 |
+
- Maintain reference style while changing content
|
| 150 |
+
- Most versatile approach
|
| 151 |
+
|
| 152 |
+
#### **Style Mimicking** (High Strength: 0.7-0.9)
|
| 153 |
+
- Close adherence to reference technique
|
| 154 |
+
- Minimal creative deviation
|
| 155 |
+
- Good for specific artistic styles
|
| 156 |
+
|
| 157 |
+
## 📁 Output Management
|
| 158 |
+
|
| 159 |
+
### **File Naming Convention**
|
| 160 |
+
|
| 161 |
+
Generated files follow a comprehensive naming pattern:
|
| 162 |
+
```
|
| 163 |
+
{prompt}_{style}_{mood}_{timestamp}_seed{number}_{REFIMG|NOREFIMG}_v{variation}.png
|
| 164 |
+
```
|
| 165 |
+
|
| 166 |
+
**Example:**
|
| 167 |
+
```
|
| 168 |
+
magical_forest_fantasy_mystical_20250701_143022_seed12345_REFIMG_v1.png
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
### **Metadata Files**
|
| 172 |
+
|
| 173 |
+
Each image includes a JSON metadata file with:
|
| 174 |
+
- Complete generation parameters
|
| 175 |
+
- Reference image information
|
| 176 |
+
- AI style analysis results
|
| 177 |
+
- Reproducibility data
|
| 178 |
+
|
| 179 |
+
### **Organization Tips**
|
| 180 |
+
|
| 181 |
+
- **Create project folders** for different art series
|
| 182 |
+
- **Use consistent naming** for easy searching
|
| 183 |
+
- **Save metadata** for reproducing successful results
|
| 184 |
+
- **Export favorites** to separate collections
|
| 185 |
+
|
| 186 |
+
## 🔧 Troubleshooting
|
| 187 |
+
|
| 188 |
+
### **Common Issues**
|
| 189 |
+
|
| 190 |
+
#### **"Failed to load image from URL"**
|
| 191 |
+
- ✅ Check URL is a direct image link
|
| 192 |
+
- ✅ Try right-clicking image → "Copy image address"
|
| 193 |
+
- ✅ Ensure URL ends with .jpg, .png, etc.
|
| 194 |
+
- ✅ Test URL in browser first
|
| 195 |
+
|
| 196 |
+
#### **"Generation taking too long"**
|
| 197 |
+
- ✅ Reduce inference steps (try 15-20)
|
| 198 |
+
- ✅ Lower image count (try 1-2 images)
|
| 199 |
+
- ✅ Check GPU memory availability
|
| 200 |
+
- ✅ Restart application if needed
|
| 201 |
+
|
| 202 |
+
#### **"Poor quality results"**
|
| 203 |
+
- ✅ Increase inference steps (try 30-40)
|
| 204 |
+
- ✅ Adjust reference strength
|
| 205 |
+
- ✅ Improve prompt specificity
|
| 206 |
+
- ✅ Try different reference images
|
| 207 |
+
|
| 208 |
+
#### **"Out of memory errors"**
|
| 209 |
+
- ✅ Enable memory optimizations in settings
|
| 210 |
+
- ✅ Reduce batch size to 1 image
|
| 211 |
+
- ✅ Close other applications
|
| 212 |
+
- ✅ Use CPU mode if necessary
|
| 213 |
+
|
| 214 |
+
### **Performance Optimization**
|
| 215 |
+
|
| 216 |
+
#### **For Better Speed**
|
| 217 |
+
- Use GPU if available
|
| 218 |
+
- Enable memory optimizations
|
| 219 |
+
- Start with lower inference steps
|
| 220 |
+
- Generate fewer images per batch
|
| 221 |
+
|
| 222 |
+
#### **For Better Quality**
|
| 223 |
+
- Use high-resolution reference images
|
| 224 |
+
- Increase inference steps (30-50)
|
| 225 |
+
- Fine-tune reference strength
|
| 226 |
+
- Use descriptive, specific prompts
|
| 227 |
+
|
| 228 |
+
## 🎯 Best Practices
|
| 229 |
+
|
| 230 |
+
### **Reference Image Selection**
|
| 231 |
+
1. **Choose clear, high-quality images**
|
| 232 |
+
2. **Match the style you want to achieve**
|
| 233 |
+
3. **Consider color palette and mood**
|
| 234 |
+
4. **Test different reference strengths**
|
| 235 |
+
|
| 236 |
+
### **Prompt Writing**
|
| 237 |
+
1. **Be specific about desired elements**
|
| 238 |
+
2. **Use artistic terminology**
|
| 239 |
+
3. **Describe lighting and atmosphere**
|
| 240 |
+
4. **Combine multiple style keywords**
|
| 241 |
+
|
| 242 |
+
### **Iterative Improvement**
|
| 243 |
+
1. **Start with medium reference strength**
|
| 244 |
+
2. **Generate multiple variations**
|
| 245 |
+
3. **Adjust settings based on results**
|
| 246 |
+
4. **Save successful parameter combinations**
|
| 247 |
+
|
| 248 |
+
### **Workflow Efficiency**
|
| 249 |
+
1. **Prepare reference images in advance**
|
| 250 |
+
2. **Use consistent naming conventions**
|
| 251 |
+
3. **Save metadata for reproducibility**
|
| 252 |
+
4. **Organize outputs by project/style**
|
| 253 |
+
|
| 254 |
+
## 🚀 Advanced Tips
|
| 255 |
+
|
| 256 |
+
### **Multi-Style Blending**
|
| 257 |
+
- Use reference for base style
|
| 258 |
+
- Add contrasting style keywords
|
| 259 |
+
- Experiment with different strengths
|
| 260 |
+
|
| 261 |
+
### **Series Creation**
|
| 262 |
+
- Use same reference with different prompts
|
| 263 |
+
- Maintain consistent style across images
|
| 264 |
+
- Vary only specific elements
|
| 265 |
+
|
| 266 |
+
### **Style Evolution**
|
| 267 |
+
- Start with high reference strength
|
| 268 |
+
- Gradually reduce for more creativity
|
| 269 |
+
- Create progression series
|
| 270 |
+
|
| 271 |
+
This guide provides everything you need to master CompI Phase 2.E and create stunning AI art guided by any reference image!
|
docs/PHASE3E_PERFORMANCE_GUIDE.md
ADDED
|
@@ -0,0 +1,301 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ⚙️ CompI Phase 3.E: Performance, Model Management & Reliability - Complete Guide
|
| 2 |
+
|
| 3 |
+
## 🎯 **What Phase 3.E Delivers**
|
| 4 |
+
|
| 5 |
+
**Phase 3.E transforms CompI into a production-grade platform with professional performance management, intelligent reliability, and advanced model capabilities.**
|
| 6 |
+
|
| 7 |
+
### **🤖 Model Manager**
|
| 8 |
+
- **Dynamic Model Switching**: Switch between SD 1.5 and SDXL based on requirements
|
| 9 |
+
- **Auto-Availability Checking**: Intelligent detection of model compatibility and VRAM requirements
|
| 10 |
+
- **Universal LoRA Support**: Load and scale LoRA weights across all models and generation modes
|
| 11 |
+
- **Smart Recommendations**: Hardware-based model suggestions and optimization advice
|
| 12 |
+
|
| 13 |
+
### **⚡ Performance Controls**
|
| 14 |
+
- **xFormers Integration**: Memory-efficient attention with automatic fallback
|
| 15 |
+
- **Advanced Memory Optimization**: Attention slicing, VAE slicing/tiling, CPU offloading
|
| 16 |
+
- **Precision Control**: Automatic dtype selection (fp16/bf16/fp32) based on hardware
|
| 17 |
+
- **Batch Optimization**: Memory-aware batch processing with intelligent sizing
|
| 18 |
+
|
| 19 |
+
### **📊 VRAM Monitoring**
|
| 20 |
+
- **Real-time Tracking**: Live GPU memory usage monitoring and alerts
|
| 21 |
+
- **Usage Analytics**: Memory usage patterns and optimization suggestions
|
| 22 |
+
- **Threshold Warnings**: Automatic alerts when approaching memory limits
|
| 23 |
+
- **Cache Management**: Intelligent GPU cache clearing and memory cleanup
|
| 24 |
+
|
| 25 |
+
### **🛡️ Reliability Engine**
|
| 26 |
+
- **OOM-Safe Generation**: Automatic retry with progressive fallback strategies
|
| 27 |
+
- **Intelligent Fallbacks**: Reduce size → reduce steps → CPU fallback progression
|
| 28 |
+
- **Error Classification**: Smart error detection and appropriate response strategies
|
| 29 |
+
- **Graceful Degradation**: Maintain functionality even under resource constraints
|
| 30 |
+
|
| 31 |
+
### **📦 Batch Processing**
|
| 32 |
+
- **Seed-Controlled Batches**: Deterministic seed sequences for reproducible results
|
| 33 |
+
- **Memory-Aware Batching**: Automatic batch size optimization based on available VRAM
|
| 34 |
+
- **Progress Tracking**: Detailed progress monitoring with per-image status
|
| 35 |
+
- **Failure Recovery**: Continue batch processing even if individual images fail
|
| 36 |
+
|
| 37 |
+
### **🔍 Upscaler Integration**
|
| 38 |
+
- **Latent Upscaler**: Optional 2x upscaling using Stable Diffusion Latent Upscaler
|
| 39 |
+
- **Graceful Degradation**: Clean fallback when upscaler unavailable
|
| 40 |
+
- **Memory Management**: Intelligent memory allocation for upscaling operations
|
| 41 |
+
- **Quality Enhancement**: Professional-grade image enhancement capabilities
|
| 42 |
+
|
| 43 |
+
---
|
| 44 |
+
|
| 45 |
+
## 🚀 **Quick Start Guide**
|
| 46 |
+
|
| 47 |
+
### **1. Launch Phase 3.E**
|
| 48 |
+
```bash
|
| 49 |
+
# Method 1: Using launcher script (recommended)
|
| 50 |
+
python run_phase3e_performance_manager.py
|
| 51 |
+
|
| 52 |
+
# Method 2: Direct Streamlit launch
|
| 53 |
+
streamlit run src/ui/compi_phase3e_performance_manager.py --server.port 8505
|
| 54 |
+
```
|
| 55 |
+
|
| 56 |
+
### **2. System Requirements Check**
|
| 57 |
+
The launcher automatically checks:
|
| 58 |
+
- **GPU Setup**: CUDA availability and VRAM capacity
|
| 59 |
+
- **Dependencies**: Required and optional packages
|
| 60 |
+
- **Model Support**: SD 1.5 and SDXL availability
|
| 61 |
+
- **Performance Features**: xFormers and upscaler support
|
| 62 |
+
|
| 63 |
+
### **3. Access the Interface**
|
| 64 |
+
- **URL:** `http://localhost:8505`
|
| 65 |
+
- **Interface:** Professional Streamlit dashboard with real-time monitoring
|
| 66 |
+
- **Sidebar:** Live VRAM monitoring and system status
|
| 67 |
+
|
| 68 |
+
---
|
| 69 |
+
|
| 70 |
+
## 🎨 **Professional Workflow**
|
| 71 |
+
|
| 72 |
+
### **Step 1: Model Selection**
|
| 73 |
+
1. **Choose Base Model**: SD 1.5 (fast, compatible) or SDXL (high quality, more VRAM)
|
| 74 |
+
2. **Select Generation Mode**: txt2img or img2img
|
| 75 |
+
3. **Check Compatibility**: System automatically validates model/mode combinations
|
| 76 |
+
4. **Review VRAM Requirements**: See memory requirements and availability status
|
| 77 |
+
|
| 78 |
+
### **Step 2: LoRA Integration (Optional)**
|
| 79 |
+
1. **Enable LoRA**: Toggle LoRA support
|
| 80 |
+
2. **Specify Path**: Enter path to LoRA weights (diffusers format)
|
| 81 |
+
3. **Set Scale**: Adjust LoRA influence (0.1-2.0)
|
| 82 |
+
4. **Verify Status**: Check LoRA loading status and compatibility
|
| 83 |
+
|
| 84 |
+
### **Step 3: Performance Optimization**
|
| 85 |
+
1. **Choose Optimization Level**: Conservative, Balanced, Aggressive, or Extreme
|
| 86 |
+
2. **Monitor VRAM**: Watch real-time memory usage in sidebar
|
| 87 |
+
3. **Adjust Settings**: Fine-tune individual optimization features
|
| 88 |
+
4. **Enable Reliability**: Configure OOM retry and CPU fallback options
|
| 89 |
+
|
| 90 |
+
### **Step 4: Generation**
|
| 91 |
+
1. **Single Images**: Generate individual images with full control
|
| 92 |
+
2. **Batch Processing**: Create multiple images with seed sequences
|
| 93 |
+
3. **Monitor Progress**: Track generation progress and memory usage
|
| 94 |
+
4. **Review Results**: Analyze generation statistics and performance metrics
|
| 95 |
+
|
| 96 |
+
---
|
| 97 |
+
|
| 98 |
+
## 🔧 **Advanced Features**
|
| 99 |
+
|
| 100 |
+
### **🤖 Model Manager Deep Dive**
|
| 101 |
+
|
| 102 |
+
#### **Model Compatibility Matrix**
|
| 103 |
+
```python
|
| 104 |
+
SD 1.5:
|
| 105 |
+
✅ txt2img (512x512 optimal)
|
| 106 |
+
✅ img2img (all strengths)
|
| 107 |
+
✅ ControlNet (full support)
|
| 108 |
+
✅ LoRA (universal compatibility)
|
| 109 |
+
💾 VRAM: 4+ GB recommended
|
| 110 |
+
|
| 111 |
+
SDXL:
|
| 112 |
+
✅ txt2img (1024x1024 optimal)
|
| 113 |
+
✅ img2img (limited support)
|
| 114 |
+
⚠️ ControlNet (requires special handling)
|
| 115 |
+
✅ LoRA (SDXL-compatible weights only)
|
| 116 |
+
💾 VRAM: 8+ GB recommended
|
| 117 |
+
```
|
| 118 |
+
|
| 119 |
+
#### **Automatic Model Selection Logic**
|
| 120 |
+
- **VRAM < 6GB**: Recommends SD 1.5 only
|
| 121 |
+
- **VRAM 6-8GB**: SD 1.5 preferred, SDXL with warnings
|
| 122 |
+
- **VRAM 8GB+**: Full SDXL support with optimizations
|
| 123 |
+
- **CPU Mode**: SD 1.5 only with aggressive optimizations
|
| 124 |
+
|
| 125 |
+
### **⚡ Performance Optimization Levels**
|
| 126 |
+
|
| 127 |
+
#### **Conservative Mode**
|
| 128 |
+
- Basic attention slicing
|
| 129 |
+
- Standard precision (fp16/fp32)
|
| 130 |
+
- Minimal memory optimizations
|
| 131 |
+
- **Best for**: Stable systems, first-time users
|
| 132 |
+
|
| 133 |
+
#### **Balanced Mode (Default)**
|
| 134 |
+
- xFormers attention (if available)
|
| 135 |
+
- Attention + VAE slicing
|
| 136 |
+
- Automatic precision selection
|
| 137 |
+
- **Best for**: Most users, good performance/stability balance
|
| 138 |
+
|
| 139 |
+
#### **Aggressive Mode**
|
| 140 |
+
- All memory optimizations enabled
|
| 141 |
+
- VAE tiling for large images
|
| 142 |
+
- Maximum memory efficiency
|
| 143 |
+
- **Best for**: Limited VRAM, large batch processing
|
| 144 |
+
|
| 145 |
+
#### **Extreme Mode**
|
| 146 |
+
- CPU offloading enabled
|
| 147 |
+
- Maximum memory savings
|
| 148 |
+
- Slower but uses minimal VRAM
|
| 149 |
+
- **Best for**: Very limited VRAM (<4GB)
|
| 150 |
+
|
| 151 |
+
### **🛡️ Reliability Engine Strategies**
|
| 152 |
+
|
| 153 |
+
#### **Fallback Progression**
|
| 154 |
+
```python
|
| 155 |
+
Strategy 1: Original settings (100% size, 100% steps)
|
| 156 |
+
Strategy 2: Reduced size (75% size, 90% steps)
|
| 157 |
+
Strategy 3: Half size (50% size, 80% steps)
|
| 158 |
+
Strategy 4: Minimal (50% size, 60% steps)
|
| 159 |
+
Final: CPU fallback if all GPU attempts fail
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
#### **Error Classification**
|
| 163 |
+
- **CUDA OOM**: Triggers progressive fallback
|
| 164 |
+
- **Model Loading**: Suggests alternative models
|
| 165 |
+
- **LoRA Errors**: Disables LoRA and retries
|
| 166 |
+
- **General Errors**: Logs and reports with context
|
| 167 |
+
|
| 168 |
+
### **📊 VRAM Monitoring System**
|
| 169 |
+
|
| 170 |
+
#### **Real-time Metrics**
|
| 171 |
+
- **Total VRAM**: Hardware capacity
|
| 172 |
+
- **Used VRAM**: Currently allocated memory
|
| 173 |
+
- **Free VRAM**: Available for new operations
|
| 174 |
+
- **Usage Percentage**: Current utilization level
|
| 175 |
+
|
| 176 |
+
#### **Smart Alerts**
|
| 177 |
+
- **Green (0-60%)**: Optimal usage
|
| 178 |
+
- **Yellow (60-80%)**: Moderate usage, monitor closely
|
| 179 |
+
- **Red (80%+)**: High usage, optimization recommended
|
| 180 |
+
|
| 181 |
+
#### **Memory Management**
|
| 182 |
+
- **Automatic Cache Clearing**: Between batch generations
|
| 183 |
+
- **Memory Leak Detection**: Identifies and resolves memory issues
|
| 184 |
+
- **Optimization Suggestions**: Hardware-specific recommendations
|
| 185 |
+
|
| 186 |
+
---
|
| 187 |
+
|
| 188 |
+
## 📈 **Performance Benchmarks**
|
| 189 |
+
|
| 190 |
+
### **Generation Speed Comparison**
|
| 191 |
+
```
|
| 192 |
+
SD 1.5 (512x512, 20 steps):
|
| 193 |
+
RTX 4090: ~15-25 seconds
|
| 194 |
+
RTX 3080: ~25-35 seconds
|
| 195 |
+
RTX 2080: ~45-60 seconds
|
| 196 |
+
CPU: ~5-10 minutes
|
| 197 |
+
|
| 198 |
+
SDXL (1024x1024, 20 steps):
|
| 199 |
+
RTX 4090: ~30-45 seconds
|
| 200 |
+
RTX 3080: ~60-90 seconds
|
| 201 |
+
RTX 2080: ~2-3 minutes (with optimizations)
|
| 202 |
+
CPU: ~15-30 minutes
|
| 203 |
+
```
|
| 204 |
+
|
| 205 |
+
### **Memory Usage Patterns**
|
| 206 |
+
```
|
| 207 |
+
SD 1.5:
|
| 208 |
+
Base: ~3.5GB VRAM
|
| 209 |
+
+ LoRA: ~3.7GB VRAM
|
| 210 |
+
+ Upscaler: ~5.5GB VRAM
|
| 211 |
+
|
| 212 |
+
SDXL:
|
| 213 |
+
Base: ~6.5GB VRAM
|
| 214 |
+
+ LoRA: ~7.0GB VRAM
|
| 215 |
+
+ Upscaler: ~9.0GB VRAM
|
| 216 |
+
```
|
| 217 |
+
|
| 218 |
+
---
|
| 219 |
+
|
| 220 |
+
## 🔍 **Troubleshooting Guide**
|
| 221 |
+
|
| 222 |
+
### **Common Issues & Solutions**
|
| 223 |
+
|
| 224 |
+
#### **"CUDA Out of Memory" Errors**
|
| 225 |
+
1. **Enable OOM Auto-Retry**: Automatic fallback handling
|
| 226 |
+
2. **Reduce Image Size**: Use 512x512 instead of 1024x1024
|
| 227 |
+
3. **Lower Batch Size**: Generate fewer images simultaneously
|
| 228 |
+
4. **Enable Aggressive Optimizations**: Use VAE slicing/tiling
|
| 229 |
+
5. **Clear GPU Cache**: Use sidebar "Clear GPU Cache" button
|
| 230 |
+
|
| 231 |
+
#### **Slow Generation Speed**
|
| 232 |
+
1. **Enable xFormers**: Significant speed improvement if available
|
| 233 |
+
2. **Use Balanced Optimization**: Good speed/quality trade-off
|
| 234 |
+
3. **Reduce Inference Steps**: 15-20 steps often sufficient
|
| 235 |
+
4. **Check VRAM Usage**: Ensure not hitting memory limits
|
| 236 |
+
|
| 237 |
+
#### **Model Loading Failures**
|
| 238 |
+
1. **Check Internet Connection**: Models download on first use
|
| 239 |
+
2. **Verify Disk Space**: Models require 2-7GB storage each
|
| 240 |
+
3. **Try Alternative Model**: Switch between SD 1.5 and SDXL
|
| 241 |
+
4. **Clear Model Cache**: Remove cached models and re-download
|
| 242 |
+
|
| 243 |
+
#### **LoRA Loading Issues**
|
| 244 |
+
1. **Verify Path**: Ensure LoRA files exist at specified path
|
| 245 |
+
2. **Check Format**: Use diffusers-compatible LoRA weights
|
| 246 |
+
3. **Model Compatibility**: Ensure LoRA matches base model type
|
| 247 |
+
4. **Scale Adjustment**: Try different LoRA scale values
|
| 248 |
+
|
| 249 |
+
---
|
| 250 |
+
|
| 251 |
+
## 🎯 **Best Practices**
|
| 252 |
+
|
| 253 |
+
### **📝 Performance Optimization**
|
| 254 |
+
1. **Start Conservative**: Begin with balanced settings, adjust as needed
|
| 255 |
+
2. **Monitor VRAM**: Keep usage below 80% for stability
|
| 256 |
+
3. **Batch Wisely**: Use smaller batches on limited hardware
|
| 257 |
+
4. **Clear Cache Regularly**: Prevent memory accumulation
|
| 258 |
+
|
| 259 |
+
### **🤖 Model Selection**
|
| 260 |
+
1. **SD 1.5 for Speed**: Faster generation, lower VRAM requirements
|
| 261 |
+
2. **SDXL for Quality**: Higher resolution, better detail
|
| 262 |
+
3. **Match Hardware**: Choose model based on available VRAM
|
| 263 |
+
4. **Test Compatibility**: Verify model works with your use case
|
| 264 |
+
|
| 265 |
+
### **🛡️ Reliability**
|
| 266 |
+
1. **Enable Auto-Retry**: Let system handle OOM errors automatically
|
| 267 |
+
2. **Use Fallbacks**: Allow progressive degradation for reliability
|
| 268 |
+
3. **Monitor Logs**: Check run logs for patterns and issues
|
| 269 |
+
4. **Plan for Failures**: Design workflows that handle generation failures
|
| 270 |
+
|
| 271 |
+
---
|
| 272 |
+
|
| 273 |
+
## 🚀 **Integration with CompI Ecosystem**
|
| 274 |
+
|
| 275 |
+
### **Universal Enhancement**
|
| 276 |
+
Phase 3.E enhances ALL existing CompI components:
|
| 277 |
+
- **Ultimate Dashboard**: Model switching and performance controls
|
| 278 |
+
- **Phase 2.A-2.E**: Reliability and optimization for all multimodal phases
|
| 279 |
+
- **Phase 1.A-1.E**: Enhanced foundation with professional features
|
| 280 |
+
- **Phase 3.D**: Performance metrics in workflow management
|
| 281 |
+
|
| 282 |
+
### **Backward Compatibility**
|
| 283 |
+
- **Graceful Degradation**: Works on all hardware configurations
|
| 284 |
+
- **Default Settings**: Optimal defaults for most users
|
| 285 |
+
- **Progressive Enhancement**: Advanced features when available
|
| 286 |
+
- **Legacy Support**: Maintains compatibility with existing workflows
|
| 287 |
+
|
| 288 |
+
---
|
| 289 |
+
|
| 290 |
+
## 🎉 **Phase 3.E: Production-Grade CompI Complete**
|
| 291 |
+
|
| 292 |
+
**Phase 3.E transforms CompI into a production-grade platform with professional performance management, intelligent reliability, and advanced model capabilities.**
|
| 293 |
+
|
| 294 |
+
**Key Benefits:**
|
| 295 |
+
- ✅ **Professional Performance**: Industry-standard optimization and monitoring
|
| 296 |
+
- ✅ **Intelligent Reliability**: Automatic error handling and recovery
|
| 297 |
+
- ✅ **Advanced Model Management**: Dynamic switching and LoRA integration
|
| 298 |
+
- ✅ **Production Ready**: Suitable for commercial and professional use
|
| 299 |
+
- ✅ **Universal Enhancement**: Improves all existing CompI features
|
| 300 |
+
|
| 301 |
+
**CompI is now a complete, production-grade multimodal AI art generation platform!** 🎨✨
|
docs/PHASE3_FINAL_DASHBOARD_GUIDE.md
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🧪 CompI Phase 3 Final Dashboard - Complete Integration Guide
|
| 2 |
+
|
| 3 |
+
## 🎯 **What This Delivers**
|
| 4 |
+
|
| 5 |
+
**The Phase 3 Final Dashboard is the ultimate CompI interface that integrates ALL Phase 3 components into a single, unified creative environment.**
|
| 6 |
+
|
| 7 |
+
### **🚀 Complete Feature Integration:**
|
| 8 |
+
|
| 9 |
+
#### **🧩 Phase 3.A/3.B: True Multimodal Fusion**
|
| 10 |
+
- **Real Audio Processing**: Whisper transcription + librosa feature analysis
|
| 11 |
+
- **Actual Data Analysis**: CSV processing + mathematical formula evaluation
|
| 12 |
+
- **Sentiment Analysis**: TextBlob emotion detection with polarity scoring
|
| 13 |
+
- **Live Real-time Data**: Weather API + RSS news feeds integration
|
| 14 |
+
- **Intelligent Fusion**: All inputs combined into enhanced prompts
|
| 15 |
+
|
| 16 |
+
#### **🖼️ Phase 3.C: Advanced References**
|
| 17 |
+
- **Multi-Reference Support**: Upload files + paste URLs simultaneously
|
| 18 |
+
- **Role-Based Assignment**: Separate style vs structure reference selection
|
| 19 |
+
- **Live ControlNet Previews**: Real-time Canny/Depth map generation
|
| 20 |
+
- **Hybrid Generation**: CN+I2I with intelligent fallback to two-pass approach
|
| 21 |
+
- **Professional Controls**: Fine-grained parameter control for all aspects
|
| 22 |
+
|
| 23 |
+
#### **⚙️ Phase 3.E: Performance Management**
|
| 24 |
+
- **Model Switching**: SD 1.5 ↔ SDXL with automatic availability checking
|
| 25 |
+
- **LoRA Integration**: Load and scale LoRA weights with visual feedback
|
| 26 |
+
- **Performance Optimizations**: xFormers, attention slicing, VAE optimizations
|
| 27 |
+
- **VRAM Monitoring**: Real-time GPU memory usage tracking
|
| 28 |
+
- **OOM Recovery**: Progressive fallback with intelligent retry strategies
|
| 29 |
+
- **Optional Upscaling**: Latent upscaler integration for quality enhancement
|
| 30 |
+
|
| 31 |
+
#### **🎛️ Phase 3.D: Professional Workflow**
|
| 32 |
+
- **Advanced Gallery**: Image filtering by mode, prompt, steps with visual grid
|
| 33 |
+
- **Annotation System**: Rating (1-5), tags, notes for comprehensive organization
|
| 34 |
+
- **Preset Management**: Save/load complete generation configurations
|
| 35 |
+
- **Export Bundles**: Complete ZIP packages with images, metadata, annotations, presets
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## 🏗️ **Architecture Overview**
|
| 40 |
+
|
| 41 |
+
### **7-Tab Unified Interface:**
|
| 42 |
+
```python
|
| 43 |
+
1. 🧩 Inputs (Text/Audio/Data/Emotion/Real‑time) # Phase 3.A/3.B
|
| 44 |
+
2. 🖼️ Advanced References # Phase 3.C
|
| 45 |
+
3. ⚙️ Model & Performance # Phase 3.E
|
| 46 |
+
4. 🎛️ Generate # Unified generation
|
| 47 |
+
5. 🖼️ Gallery & Annotate # Phase 3.D
|
| 48 |
+
6. 💾 Presets # Phase 3.D
|
| 49 |
+
7. 📦 Export # Phase 3.D
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
### **Intelligent Generation Modes:**
|
| 53 |
+
```python
|
| 54 |
+
# Smart mode selection based on available inputs:
|
| 55 |
+
mode = "T2I" # Text-to-Image (baseline)
|
| 56 |
+
if have_cn and have_style: mode = "CN+I2I" # Hybrid ControlNet + Img2Img
|
| 57 |
+
elif have_cn: mode = "CN" # ControlNet only
|
| 58 |
+
elif have_style: mode = "I2I" # Img2Img only
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
### **Real-time Performance Monitoring:**
|
| 62 |
+
```python
|
| 63 |
+
# Live VRAM tracking in header
|
| 64 |
+
colA: Device (CUDA/CPU)
|
| 65 |
+
colB: Total VRAM (GB)
|
| 66 |
+
colC: Used VRAM (GB)
|
| 67 |
+
colD: PyTorch version + status
|
| 68 |
+
```
|
| 69 |
+
|
| 70 |
+
---
|
| 71 |
+
|
| 72 |
+
## 🎨 **Professional Workflow**
|
| 73 |
+
|
| 74 |
+
### **Complete Creative Process:**
|
| 75 |
+
|
| 76 |
+
#### **1. Configure Multimodal Inputs (Tab 1)**
|
| 77 |
+
- **Text & Style**: Main prompt, artistic style, mood, negative prompt
|
| 78 |
+
- **Audio Analysis**: Upload audio → Whisper transcription → librosa features
|
| 79 |
+
- **Data Processing**: CSV upload or mathematical formulas → visualization
|
| 80 |
+
- **Emotion Analysis**: Sentiment analysis with TextBlob polarity scoring
|
| 81 |
+
- **Real-time Feeds**: Weather data + news headlines integration
|
| 82 |
+
|
| 83 |
+
#### **2. Advanced References (Tab 2)**
|
| 84 |
+
- **Multi-Reference Upload**: Files + URLs simultaneously supported
|
| 85 |
+
- **Role Assignment**: Select images for style influence vs structure control
|
| 86 |
+
- **ControlNet Integration**: Choose Canny or Depth with live preview
|
| 87 |
+
- **Parameter Control**: Conditioning scale, img2img strength adjustment
|
| 88 |
+
|
| 89 |
+
#### **3. Model & Performance (Tab 3)**
|
| 90 |
+
- **Model Selection**: SD 1.5 (fast) or SDXL (quality) based on VRAM
|
| 91 |
+
- **LoRA Integration**: Load custom LoRA weights with scale control
|
| 92 |
+
- **Performance Tuning**: xFormers, attention slicing, VAE optimizations
|
| 93 |
+
- **Reliability Settings**: OOM auto-retry, batch processing, upscaling
|
| 94 |
+
|
| 95 |
+
#### **4. Intelligent Generation (Tab 4)**
|
| 96 |
+
- **Fusion Preview**: See combined prompt from all inputs
|
| 97 |
+
- **Smart Mode Selection**: Automatic best approach based on available inputs
|
| 98 |
+
- **Batch Processing**: Multiple images with seed control
|
| 99 |
+
- **Real-time Feedback**: Progress tracking and error handling
|
| 100 |
+
|
| 101 |
+
#### **5. Gallery Management (Tab 5)**
|
| 102 |
+
- **Advanced Filtering**: By mode, prompt content, generation parameters
|
| 103 |
+
- **Visual Gallery**: 4-column grid with image previews and metadata
|
| 104 |
+
- **Annotation System**: Rate (1-5), tag, and add notes to images
|
| 105 |
+
- **Batch Operations**: Select multiple images for annotation
|
| 106 |
+
|
| 107 |
+
#### **6. Preset System (Tab 6)**
|
| 108 |
+
- **Configuration Capture**: Save complete generation settings
|
| 109 |
+
- **JSON Preview**: See exact preset structure before saving
|
| 110 |
+
- **Load Management**: Browse and load existing presets
|
| 111 |
+
- **Reusability**: Apply saved settings to new generations
|
| 112 |
+
|
| 113 |
+
#### **7. Export Bundles (Tab 7)**
|
| 114 |
+
- **Complete Packages**: Images + metadata + annotations + presets
|
| 115 |
+
- **Reproducibility**: Full environment snapshots for exact reproduction
|
| 116 |
+
- **Professional Format**: ZIP bundles with manifest and README
|
| 117 |
+
- **Selective Export**: Choose specific images and include optional presets
|
| 118 |
+
|
| 119 |
+
---
|
| 120 |
+
|
| 121 |
+
## 🚀 **Quick Start Guide**
|
| 122 |
+
|
| 123 |
+
### **1. Launch the Dashboard**
|
| 124 |
+
```bash
|
| 125 |
+
# Method 1: Using launcher (recommended)
|
| 126 |
+
python run_phase3_final_dashboard.py
|
| 127 |
+
|
| 128 |
+
# Method 2: Direct Streamlit launch
|
| 129 |
+
streamlit run src/ui/compi_phase3_final_dashboard.py --server.port 8506
|
| 130 |
+
```
|
| 131 |
+
|
| 132 |
+
### **2. Access the Interface**
|
| 133 |
+
- **URL:** `http://localhost:8506`
|
| 134 |
+
- **Interface:** Professional 7-tab dashboard with real-time monitoring
|
| 135 |
+
- **Header:** Live VRAM usage and system status
|
| 136 |
+
|
| 137 |
+
### **3. Basic Workflow**
|
| 138 |
+
1. **Configure Inputs**: Set up text, audio, data, emotion, real-time feeds
|
| 139 |
+
2. **Add References**: Upload images and assign style/structure roles
|
| 140 |
+
3. **Choose Model**: Select SD 1.5 or SDXL based on your hardware
|
| 141 |
+
4. **Generate**: Create art with intelligent fusion of all inputs
|
| 142 |
+
5. **Review & Annotate**: Rate and organize results in gallery
|
| 143 |
+
6. **Save & Export**: Create presets and export complete bundles
|
| 144 |
+
|
| 145 |
+
---
|
| 146 |
+
|
| 147 |
+
## 🔧 **Advanced Features**
|
| 148 |
+
|
| 149 |
+
### **🎵 Audio Processing Pipeline**
|
| 150 |
+
```python
|
| 151 |
+
# Complete audio analysis chain:
|
| 152 |
+
1. Upload audio file (.wav/.mp3)
|
| 153 |
+
2. Librosa feature extraction (tempo, energy, ZCR)
|
| 154 |
+
3. Whisper transcription (base model)
|
| 155 |
+
4. Intelligent tag generation
|
| 156 |
+
5. Prompt enhancement with audio context
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
### **📊 Data Integration System**
|
| 160 |
+
```python
|
| 161 |
+
# Dual data processing modes:
|
| 162 |
+
1. CSV Upload: Pandas analysis → statistical summary → visualization
|
| 163 |
+
2. Formula Mode: NumPy evaluation → pattern generation → plotting
|
| 164 |
+
3. Poetic summarization for prompt enhancement
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
### **🖼️ Advanced Reference System**
|
| 168 |
+
```python
|
| 169 |
+
# Role-based reference processing:
|
| 170 |
+
Style References: Used for img2img artistic influence
|
| 171 |
+
Structure References: Used for ControlNet composition control
|
| 172 |
+
Live Previews: Real-time Canny/Depth map generation
|
| 173 |
+
Hybrid Modes: CN+I2I with intelligent fallback strategies
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
### **⚡ Performance Optimization**
|
| 177 |
+
```python
|
| 178 |
+
# Multi-level optimization system:
|
| 179 |
+
1. xFormers: Memory-efficient attention (if available)
|
| 180 |
+
2. Attention Slicing: Reduce memory usage
|
| 181 |
+
3. VAE Slicing/Tiling: Handle large images efficiently
|
| 182 |
+
4. OOM Recovery: Progressive fallback (size → steps → CPU)
|
| 183 |
+
5. VRAM Monitoring: Real-time usage tracking
|
| 184 |
+
```
|
| 185 |
+
|
| 186 |
+
### **🛡️ Reliability Features**
|
| 187 |
+
```python
|
| 188 |
+
# Production-grade error handling:
|
| 189 |
+
1. Graceful Degradation: Features work even when components unavailable
|
| 190 |
+
2. Intelligent Fallbacks: CN+I2I → two-pass approach when needed
|
| 191 |
+
3. OOM Recovery: Automatic retry with reduced parameters
|
| 192 |
+
4. Error Classification: Specific handling for different error types
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
---
|
| 196 |
+
|
| 197 |
+
## 📊 **Performance Benchmarks**
|
| 198 |
+
|
| 199 |
+
### **Generation Speed (Approximate)**
|
| 200 |
+
```
|
| 201 |
+
SD 1.5 (512x512, 20 steps):
|
| 202 |
+
RTX 4090: ~15-25 seconds
|
| 203 |
+
RTX 3080: ~25-35 seconds
|
| 204 |
+
RTX 2080: ~45-60 seconds
|
| 205 |
+
CPU: ~5-10 minutes
|
| 206 |
+
|
| 207 |
+
SDXL (1024x1024, 20 steps):
|
| 208 |
+
RTX 4090: ~30-45 seconds
|
| 209 |
+
RTX 3080: ~60-90 seconds
|
| 210 |
+
RTX 2080: ~2-3 minutes (with optimizations)
|
| 211 |
+
CPU: ~15-30 minutes
|
| 212 |
+
```
|
| 213 |
+
|
| 214 |
+
### **Memory Requirements**
|
| 215 |
+
```
|
| 216 |
+
SD 1.5 Base: ~3.5GB VRAM
|
| 217 |
+
SD 1.5 + LoRA: ~3.7GB VRAM
|
| 218 |
+
SD 1.5 + Upscaler: ~5.5GB VRAM
|
| 219 |
+
|
| 220 |
+
SDXL Base: ~6.5GB VRAM
|
| 221 |
+
SDXL + LoRA: ~7.0GB VRAM
|
| 222 |
+
SDXL + Upscaler: ~9.0GB VRAM
|
| 223 |
+
```
|
| 224 |
+
|
| 225 |
+
---
|
| 226 |
+
|
| 227 |
+
## 🎯 **Best Practices**
|
| 228 |
+
|
| 229 |
+
### **📝 Optimal Workflow**
|
| 230 |
+
1. **Start Simple**: Begin with text-only generation to test setup
|
| 231 |
+
2. **Add Gradually**: Introduce multimodal inputs one at a time
|
| 232 |
+
3. **Monitor VRAM**: Keep usage below 80% for stability
|
| 233 |
+
4. **Use Presets**: Save successful configurations for reuse
|
| 234 |
+
5. **Export Regularly**: Create bundles of your best work
|
| 235 |
+
|
| 236 |
+
### **🤖 Model Selection**
|
| 237 |
+
1. **SD 1.5 for Speed**: Faster generation, lower VRAM, wide compatibility
|
| 238 |
+
2. **SDXL for Quality**: Higher resolution, better detail, requires more VRAM
|
| 239 |
+
3. **Match Hardware**: Choose model based on available VRAM
|
| 240 |
+
4. **Test First**: Verify model works with your specific use case
|
| 241 |
+
|
| 242 |
+
### **🖼️ Reference Usage**
|
| 243 |
+
1. **Style References**: Use 2-4 images for artistic influence
|
| 244 |
+
2. **Structure Reference**: Use 1 clear image for composition control
|
| 245 |
+
3. **Quality Matters**: Higher quality references produce better results
|
| 246 |
+
4. **Role Clarity**: Clearly separate style vs structure purposes
|
| 247 |
+
|
| 248 |
+
### **⚡ Performance Tuning**
|
| 249 |
+
1. **Enable xFormers**: Significant speed improvement if available
|
| 250 |
+
2. **Use Attention Slicing**: Always enable for memory efficiency
|
| 251 |
+
3. **Monitor Usage**: Watch VRAM meter and adjust accordingly
|
| 252 |
+
4. **Batch Wisely**: Use smaller batches on limited hardware
|
| 253 |
+
|
| 254 |
+
---
|
| 255 |
+
|
| 256 |
+
## 🎉 **Phase 3 Complete Achievement**
|
| 257 |
+
|
| 258 |
+
**The Phase 3 Final Dashboard represents the complete realization of the CompI vision: a unified, production-grade, multimodal AI art generation platform.**
|
| 259 |
+
|
| 260 |
+
### **✅ All Phase 3 Components Integrated:**
|
| 261 |
+
- **✅ Phase 3.A**: Multimodal input processing
|
| 262 |
+
- **✅ Phase 3.B**: True fusion engine with real processing
|
| 263 |
+
- **✅ Phase 3.C**: Advanced references with role assignment
|
| 264 |
+
- **✅ Phase 3.D**: Professional workflow management
|
| 265 |
+
- **✅ Phase 3.E**: Performance optimization and model management
|
| 266 |
+
|
| 267 |
+
### **🚀 Key Benefits:**
|
| 268 |
+
- **Single Interface**: All CompI features in one unified dashboard
|
| 269 |
+
- **Professional Workflow**: From input to export in one seamless process
|
| 270 |
+
- **Production Ready**: Robust error handling and performance optimization
|
| 271 |
+
- **Universal Compatibility**: Works across different hardware configurations
|
| 272 |
+
- **Complete Integration**: All phases work together harmoniously
|
| 273 |
+
|
| 274 |
+
**CompI Phase 3 is now complete - the ultimate multimodal AI art generation platform!** 🎨✨
|
docs/PHASE4_DEPLOYMENT_GUIDE.md
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Phase 4: Web Deployment Guide
|
| 2 |
+
|
| 3 |
+
## Overview
|
| 4 |
+
|
| 5 |
+
Phase 4 deploys CompI to Hugging Face Spaces with automatic CI/CD from GitHub. This enables public access to your multimodal AI art generation platform.
|
| 6 |
+
|
| 7 |
+
## 4.A: Repository Preparation ✅
|
| 8 |
+
|
| 9 |
+
The following files have been added to your repo:
|
| 10 |
+
|
| 11 |
+
- `packages.txt` - System dependencies for audio processing and OpenGL
|
| 12 |
+
- `.gitattributes` - Git LFS configuration for model files
|
| 13 |
+
- `requirements.txt` - Already present with Python dependencies
|
| 14 |
+
|
| 15 |
+
## 4.B: Create Hugging Face Space
|
| 16 |
+
|
| 17 |
+
### Step 1: Create New Space
|
| 18 |
+
|
| 19 |
+
1. Go to [Hugging Face Spaces](https://huggingface.co/spaces)
|
| 20 |
+
2. Click "Create new Space"
|
| 21 |
+
3. Choose:
|
| 22 |
+
- **SDK**: Streamlit
|
| 23 |
+
- **Space name**: `compi-final-dashboard` (or your preference)
|
| 24 |
+
- **Visibility**: Public
|
| 25 |
+
- **Hardware**: CPU basic (free tier)
|
| 26 |
+
|
| 27 |
+
### Step 2: Configure Space README
|
| 28 |
+
|
| 29 |
+
Replace the default README.md in your Space with:
|
| 30 |
+
|
| 31 |
+
```markdown
|
| 32 |
+
---
|
| 33 |
+
title: CompI — Final Dashboard
|
| 34 |
+
emoji: 🎨
|
| 35 |
+
sdk: streamlit
|
| 36 |
+
app_file: src/ui/compi_phase3_final_dashboard.py
|
| 37 |
+
pinned: false
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
# CompI - Multimodal AI Art Generation Platform
|
| 41 |
+
|
| 42 |
+
The ultimate creative platform combining text, audio, data, emotion, and real-time inputs for AI art generation.
|
| 43 |
+
|
| 44 |
+
## Features
|
| 45 |
+
|
| 46 |
+
🧩 **Multimodal Inputs** - Text, Audio, Data, Emotion, Real-time feeds
|
| 47 |
+
🖼️ **Advanced References** - Multi-image upload with role assignment
|
| 48 |
+
⚙️ **Model Management** - SD 1.5/SDXL switching, LoRA integration
|
| 49 |
+
🖼️ **Professional Gallery** - Filtering, rating, annotation system
|
| 50 |
+
💾 **Preset Management** - Save/load complete configurations
|
| 51 |
+
📦 **Export System** - Complete bundles with metadata
|
| 52 |
+
|
| 53 |
+
## Usage
|
| 54 |
+
|
| 55 |
+
1. Configure your inputs in the "Inputs" tab
|
| 56 |
+
2. Upload reference images in "Advanced References"
|
| 57 |
+
3. Choose your model and performance settings
|
| 58 |
+
4. Generate with intelligent fusion of all inputs
|
| 59 |
+
5. Review results in the gallery and export bundles
|
| 60 |
+
|
| 61 |
+
Built with Streamlit, PyTorch, and Diffusers.
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### Step 3: Add Secrets (Optional)
|
| 65 |
+
|
| 66 |
+
In your Space Settings → Repository secrets, add:
|
| 67 |
+
- `OPENWEATHER_KEY` - Your OpenWeatherMap API key for real-time weather data
|
| 68 |
+
|
| 69 |
+
**Important**: Do NOT link the Space to GitHub yet. We'll deploy via CI/CD.
|
| 70 |
+
|
| 71 |
+
## 4.C: GitHub Actions Setup
|
| 72 |
+
|
| 73 |
+
### Step 1: Add GitHub Secrets
|
| 74 |
+
|
| 75 |
+
In your GitHub repo, go to Settings → Secrets and variables → Actions:
|
| 76 |
+
|
| 77 |
+
1. **New repository secret**: `HF_TOKEN`
|
| 78 |
+
- Value: Your Hugging Face **Write** token from [HF Settings → Access Tokens](https://huggingface.co/settings/tokens)
|
| 79 |
+
|
| 80 |
+
2. **New repository secret**: `HF_SPACE_ID`
|
| 81 |
+
- Value: `your-username/your-space-name` (e.g., `AXRZCE/compi-final-dashboard`)
|
| 82 |
+
|
| 83 |
+
### Step 2: GitHub Actions Workflow
|
| 84 |
+
|
| 85 |
+
The workflow file `.github/workflows/deploy-to-hf-spaces.yml` will be created next.
|
| 86 |
+
|
| 87 |
+
## 4.D: Runtime Optimization
|
| 88 |
+
|
| 89 |
+
Default settings optimized for free CPU tier:
|
| 90 |
+
- **Model**: SD 1.5 (faster than SDXL)
|
| 91 |
+
- **Resolution**: 512×512 (good quality/speed balance)
|
| 92 |
+
- **Steps**: 20-24 (sufficient for good results)
|
| 93 |
+
- **Batch size**: 1 (memory efficient)
|
| 94 |
+
- **ControlNet**: Off by default (users can enable)
|
| 95 |
+
|
| 96 |
+
## 4.E: Deployment Workflow
|
| 97 |
+
|
| 98 |
+
1. **Development**: Work on feature branches
|
| 99 |
+
2. **Testing**: Test locally with `streamlit run src/ui/compi_phase3_final_dashboard.py`
|
| 100 |
+
3. **Deploy**: Merge to `main` → GitHub Actions automatically deploys to HF Space
|
| 101 |
+
4. **Rollback**: Revert commit on `main` if issues occur
|
| 102 |
+
|
| 103 |
+
## Next Steps
|
| 104 |
+
|
| 105 |
+
1. Complete the HF Space setup above
|
| 106 |
+
2. Add GitHub secrets
|
| 107 |
+
3. The GitHub Actions workflow will be created automatically
|
| 108 |
+
4. Test deployment by pushing to `main`
|
| 109 |
+
|
| 110 |
+
Your deployed app will be available at: `https://your-username-your-space.hf.space`
|
docs/PHASE4_RUNTIME_OPTIMIZATION.md
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Phase 4.D: Runtime Optimization for Free CPU Deployment
|
| 2 |
+
|
| 3 |
+
## Current Default Settings Analysis
|
| 4 |
+
|
| 5 |
+
The CompI Phase 3 Final Dashboard is already well-optimized for free CPU deployment with the following defaults:
|
| 6 |
+
|
| 7 |
+
### Model Selection Defaults
|
| 8 |
+
- **Base Model**: SD 1.5 (v1-5) - Fast and CPU-friendly
|
| 9 |
+
- **Generation Mode**: txt2img - Most efficient mode
|
| 10 |
+
- **SDXL**: Available but not default (requires more resources)
|
| 11 |
+
|
| 12 |
+
### Image Generation Defaults
|
| 13 |
+
- **Width**: 512px (optimal balance of quality/speed)
|
| 14 |
+
- **Height**: 512px (standard square format)
|
| 15 |
+
- **Steps**: 30 (good quality with reasonable speed)
|
| 16 |
+
- **Guidance**: 7.5 (balanced prompt adherence)
|
| 17 |
+
- **Batch Size**: 1 (memory efficient)
|
| 18 |
+
- **Seed**: 0 (random, no additional computation)
|
| 19 |
+
|
| 20 |
+
### Performance Optimizations (Already Enabled)
|
| 21 |
+
- **xFormers**: Enabled by default for memory efficiency
|
| 22 |
+
- **Attention Slicing**: Enabled to reduce VRAM usage
|
| 23 |
+
- **VAE Slicing**: Enabled for memory optimization
|
| 24 |
+
- **VAE Tiling**: Available for large images
|
| 25 |
+
- **OOM Auto-retry**: Enabled with progressive size reduction
|
| 26 |
+
|
| 27 |
+
### Advanced Features (Disabled by Default)
|
| 28 |
+
- **ControlNet**: Off by default (users can enable in References tab)
|
| 29 |
+
- **LoRA**: Off by default (optional enhancement)
|
| 30 |
+
- **Upsampling**: Off by default (2x processing time)
|
| 31 |
+
|
| 32 |
+
## CPU-Specific Optimizations
|
| 33 |
+
|
| 34 |
+
### Automatic Fallback Sizes
|
| 35 |
+
The app includes intelligent OOM recovery with progressive fallbacks:
|
| 36 |
+
1. Original size (e.g., 512x512, 30 steps)
|
| 37 |
+
2. Half size (384x384, 22 steps)
|
| 38 |
+
3. Safe size (384x384, 18 steps)
|
| 39 |
+
4. Minimal size (256x256, 14 steps)
|
| 40 |
+
|
| 41 |
+
### Fast Preset Available
|
| 42 |
+
Users can click "🧼 Reset to defaults" or use the "Fast" preset:
|
| 43 |
+
- SD 1.5 model
|
| 44 |
+
- 512x512 resolution
|
| 45 |
+
- 30 steps
|
| 46 |
+
- All optimizations enabled
|
| 47 |
+
|
| 48 |
+
## Recommended Settings for Free Tier
|
| 49 |
+
|
| 50 |
+
### For Best Performance
|
| 51 |
+
```
|
| 52 |
+
Model: SD 1.5 (v1-5)
|
| 53 |
+
Resolution: 512x512 or 448x448
|
| 54 |
+
Steps: 20-25
|
| 55 |
+
Guidance: 7.5
|
| 56 |
+
Batch: 1
|
| 57 |
+
ControlNet: Off
|
| 58 |
+
LoRA: Off
|
| 59 |
+
Upsampling: Off
|
| 60 |
+
```
|
| 61 |
+
|
| 62 |
+
### For Higher Quality (Slower)
|
| 63 |
+
```
|
| 64 |
+
Model: SD 1.5 (v1-5)
|
| 65 |
+
Resolution: 768x512 or 512x768
|
| 66 |
+
Steps: 30-35
|
| 67 |
+
Guidance: 7.0-8.0
|
| 68 |
+
Batch: 1
|
| 69 |
+
ControlNet: Optional
|
| 70 |
+
LoRA: Optional (if available)
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
## User Guidance in UI
|
| 74 |
+
|
| 75 |
+
The app provides helpful guidance:
|
| 76 |
+
- VRAM safety indicators with color-coded warnings
|
| 77 |
+
- Quick tips in expandable sections
|
| 78 |
+
- Preset buttons for common use cases
|
| 79 |
+
- Performance optimization toggles with explanations
|
| 80 |
+
|
| 81 |
+
## Deployment Considerations
|
| 82 |
+
|
| 83 |
+
### Hugging Face Spaces Free Tier
|
| 84 |
+
- **CPU**: 2 vCPUs
|
| 85 |
+
- **RAM**: 16GB
|
| 86 |
+
- **Storage**: 50GB
|
| 87 |
+
- **Timeout**: 48 hours idle
|
| 88 |
+
|
| 89 |
+
### Expected Performance
|
| 90 |
+
- **512x512, 20 steps**: ~30-60 seconds per image
|
| 91 |
+
- **768x512, 30 steps**: ~60-120 seconds per image
|
| 92 |
+
- **With ControlNet**: +50-100% generation time
|
| 93 |
+
- **With LoRA**: +10-20% generation time
|
| 94 |
+
|
| 95 |
+
## Monitoring and Optimization
|
| 96 |
+
|
| 97 |
+
The dashboard includes:
|
| 98 |
+
- Real-time VRAM monitoring (when available)
|
| 99 |
+
- Generation time tracking
|
| 100 |
+
- Automatic error recovery
|
| 101 |
+
- Progressive quality degradation on resource constraints
|
| 102 |
+
|
| 103 |
+
## Conclusion
|
| 104 |
+
|
| 105 |
+
The CompI Phase 3 Final Dashboard is already optimized for free CPU deployment with:
|
| 106 |
+
- Sensible defaults for speed/quality balance
|
| 107 |
+
- Automatic fallback mechanisms
|
| 108 |
+
- User-friendly performance controls
|
| 109 |
+
- Clear guidance for optimization
|
| 110 |
+
|
| 111 |
+
No code changes are required for Phase 4.D - the existing defaults are optimal for free tier deployment.
|
docs/PROJECT_STRUCTURE.md
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Project Structure
|
| 2 |
+
|
| 3 |
+
This document outlines the organized structure of the CompI (Compositional Intelligence) project.
|
| 4 |
+
|
| 5 |
+
## 📁 Directory Structure
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
Project CompI/
|
| 9 |
+
├── 📁 src/ # Source code (organized modules)
|
| 10 |
+
│ ├── 📁 generators/ # Image generation modules
|
| 11 |
+
│ │ ├── __init__.py # Module initialization
|
| 12 |
+
│ │ ├── compi_phase1_text2image.py # Basic text-to-image
|
| 13 |
+
│ │ ├── compi_phase1_advanced.py # Advanced generation
|
| 14 |
+
│ │ ├── compi_phase1b_styled_generation.py # Style conditioning
|
| 15 |
+
│ │ ├── compi_phase1b_advanced_styling.py # Advanced styling
|
| 16 |
+
│ │ ├── compi_phase1d_evaluate_quality.py # Quality evaluation (Streamlit)
|
| 17 |
+
│ │ ├── compi_phase1d_cli_evaluation.py # Quality evaluation (CLI)
|
| 18 |
+
│ │ ├── compi_phase1e_dataset_prep.py # LoRA dataset preparation
|
| 19 |
+
│ │ ├── compi_phase1e_lora_training.py # LoRA fine-tuning
|
| 20 |
+
│ │ ├── compi_phase1e_style_generation.py # Personal style generation
|
| 21 |
+
│ │ └── compi_phase1e_style_manager.py # LoRA style management
|
| 22 |
+
│ ├── 📁 models/ # Model implementations (future)
|
| 23 |
+
│ ├── 📁 utils/ # Utility functions
|
| 24 |
+
│ │ ├── __init__.py
|
| 25 |
+
│ │ ├── logging_utils.py
|
| 26 |
+
│ │ └── file_utils.py
|
| 27 |
+
│ ├── 📁 data/ # Data processing modules (future)
|
| 28 |
+
│ ├── 📁 ui/ # User interface components (future)
|
| 29 |
+
│ ├── config.py # Project configuration
|
| 30 |
+
│ ├── setup_env.py # Environment setup script
|
| 31 |
+
│ └── test_setup.py # Environment testing
|
| 32 |
+
├── 📁 notebooks/ # Jupyter notebooks
|
| 33 |
+
│ └── 01_getting_started.ipynb # Tutorial notebook
|
| 34 |
+
├── 📁 data/ # Dataset storage
|
| 35 |
+
├── 📁 outputs/ # Generated content
|
| 36 |
+
│ ├── images/ # Generated images
|
| 37 |
+
│ └── metadata/ # Generation metadata
|
| 38 |
+
├── 📁 tests/ # Unit tests (future)
|
| 39 |
+
├── 🐍 run_basic_generation.py # Convenience: Basic generation
|
| 40 |
+
├── 🐍 run_advanced_generation.py # Convenience: Advanced generation
|
| 41 |
+
├── 🐍 run_styled_generation.py # Convenience: Style conditioning
|
| 42 |
+
├── 🐍 run_advanced_styling.py # Convenience: Advanced styling
|
| 43 |
+
├── 🐍 run_evaluation.py # Convenience: Quality evaluation
|
| 44 |
+
├── 🐍 run_lora_training.py # Convenience: LoRA training
|
| 45 |
+
├── 🐍 run_style_generation.py # Convenience: Personal style generation
|
| 46 |
+
├── 📄 requirements.txt # Python dependencies
|
| 47 |
+
├── 📄 development.md # Development roadmap
|
| 48 |
+
├── 📄 PHASE1_USAGE.md # Phase 1 usage guide
|
| 49 |
+
├── 📄 PROJECT_STRUCTURE.md # This file
|
| 50 |
+
├── 📄 .gitignore # Git ignore rules
|
| 51 |
+
└── 📄 README.md # Project overview
|
| 52 |
+
```
|
| 53 |
+
|
| 54 |
+
## 🚀 Usage Patterns
|
| 55 |
+
|
| 56 |
+
### Convenience Scripts (Recommended)
|
| 57 |
+
|
| 58 |
+
Run from project root for easy access:
|
| 59 |
+
|
| 60 |
+
```bash
|
| 61 |
+
# Basic text-to-image generation
|
| 62 |
+
python run_basic_generation.py "prompt here"
|
| 63 |
+
|
| 64 |
+
# Advanced generation with options
|
| 65 |
+
python run_advanced_generation.py "prompt" --negative "unwanted" --steps 50
|
| 66 |
+
|
| 67 |
+
# Interactive style selection
|
| 68 |
+
python run_styled_generation.py
|
| 69 |
+
|
| 70 |
+
# Advanced style conditioning
|
| 71 |
+
python run_advanced_styling.py "prompt" --style "oil painting" --mood "dramatic"
|
| 72 |
+
|
| 73 |
+
# Quality evaluation interface
|
| 74 |
+
python run_evaluation.py
|
| 75 |
+
|
| 76 |
+
# LoRA personal style training
|
| 77 |
+
python run_lora_training.py --dataset-dir datasets/my_style
|
| 78 |
+
|
| 79 |
+
# Generate with personal style
|
| 80 |
+
python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "prompt"
|
| 81 |
+
|
| 82 |
+
# LoRA personal style training
|
| 83 |
+
python run_lora_training.py --dataset-dir datasets/my_style
|
| 84 |
+
|
| 85 |
+
# Generate with personal style
|
| 86 |
+
python run_style_generation.py --lora-path lora_models/my_style/checkpoint-1000 "prompt"
|
| 87 |
+
```
|
| 88 |
+
|
| 89 |
+
### Direct Module Access
|
| 90 |
+
|
| 91 |
+
Run generators directly from their organized location:
|
| 92 |
+
|
| 93 |
+
```bash
|
| 94 |
+
# Direct access to generators
|
| 95 |
+
python src/generators/compi_phase1_text2image.py "prompt"
|
| 96 |
+
python src/generators/compi_phase1b_advanced_styling.py --list-styles
|
| 97 |
+
|
| 98 |
+
# Environment setup and testing
|
| 99 |
+
python src/setup_env.py
|
| 100 |
+
python src/test_setup.py
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
## 🎯 Benefits of This Organization
|
| 104 |
+
|
| 105 |
+
### 1. **Clean Separation of Concerns**
|
| 106 |
+
|
| 107 |
+
- **`src/generators/`** - All image generation logic
|
| 108 |
+
- **`src/utils/`** - Reusable utility functions
|
| 109 |
+
- **`src/`** - Core project modules and configuration
|
| 110 |
+
- **Root level** - Convenience scripts and documentation
|
| 111 |
+
|
| 112 |
+
### 2. **Professional Python Structure**
|
| 113 |
+
|
| 114 |
+
- Proper module organization with `__init__.py` files
|
| 115 |
+
- Clear import paths and dependencies
|
| 116 |
+
- Scalable architecture for future expansion
|
| 117 |
+
|
| 118 |
+
### 3. **Easy Access**
|
| 119 |
+
|
| 120 |
+
- Convenience scripts provide simple access from project root
|
| 121 |
+
- Direct module access for advanced users
|
| 122 |
+
- Maintains backward compatibility
|
| 123 |
+
|
| 124 |
+
### 4. **Future-Ready**
|
| 125 |
+
|
| 126 |
+
- Organized structure ready for Phase 2+ implementations
|
| 127 |
+
- Clear places for audio processing, UI components, etc.
|
| 128 |
+
- Modular design supports easy testing and maintenance
|
| 129 |
+
|
| 130 |
+
## 🔧 Development Guidelines
|
| 131 |
+
|
| 132 |
+
### Adding New Generators
|
| 133 |
+
|
| 134 |
+
1. Create new generator in `src/generators/`
|
| 135 |
+
2. Add imports to `src/generators/__init__.py`
|
| 136 |
+
3. Create convenience script in project root if needed
|
| 137 |
+
4. Update documentation
|
| 138 |
+
|
| 139 |
+
### Adding New Utilities
|
| 140 |
+
|
| 141 |
+
1. Add utility functions to appropriate module in `src/utils/`
|
| 142 |
+
2. Update `src/utils/__init__.py` imports
|
| 143 |
+
3. Import in generators as needed
|
| 144 |
+
|
| 145 |
+
### Testing
|
| 146 |
+
|
| 147 |
+
1. Add tests to `tests/` directory
|
| 148 |
+
2. Use `src/test_setup.py` for environment verification
|
| 149 |
+
3. Test both convenience scripts and direct module access
|
| 150 |
+
|
| 151 |
+
## 📚 Documentation
|
| 152 |
+
|
| 153 |
+
- **README.md** - Project overview and quick start
|
| 154 |
+
- **development.md** - Comprehensive development roadmap
|
| 155 |
+
- **PHASE1_USAGE.md** - Detailed Phase 1 usage guide
|
| 156 |
+
- **PROJECT_STRUCTURE.md** - This organizational guide
|
| 157 |
+
|
| 158 |
+
This structure provides a solid foundation for the CompI project's continued development through all planned phases.
|
docs/README.md
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CompI Documentation Index
|
| 2 |
+
|
| 3 |
+
Welcome to the CompI docs. All phase guides and project structure are collected here.
|
| 4 |
+
|
| 5 |
+
## Quick links
|
| 6 |
+
|
| 7 |
+
- Project Structure — PROJECT_STRUCTURE.md
|
| 8 |
+
- Phase 1
|
| 9 |
+
- Usage — PHASE1_USAGE.md
|
| 10 |
+
- Evaluation (1.D) — PHASE1D_EVALUATION_GUIDE.md
|
| 11 |
+
- LoRA (1.E) — PHASE1E_LORA_GUIDE.md
|
| 12 |
+
- Phase 2
|
| 13 |
+
- Audio to Image (2.A) — PHASE2A_AUDIO_TO_IMAGE_GUIDE.md
|
| 14 |
+
- Data to Image (2.B) — PHASE2B_DATA_TO_IMAGE_GUIDE.md
|
| 15 |
+
- Emotion to Image (2.C) — PHASE2C_EMOTION_TO_IMAGE_GUIDE.md
|
| 16 |
+
- Real-time Data (2.D) — PHASE2D_REALTIME_DATA_TO_IMAGE_GUIDE.md
|
| 17 |
+
- Style Reference (2.E) — PHASE2E_STYLE_REFERENCE_GUIDE.md
|
| 18 |
+
- Phase 3
|
| 19 |
+
- Final Dashboard — PHASE3_FINAL_DASHBOARD_GUIDE.md
|
| 20 |
+
- Performance (3.E) — PHASE3E_PERFORMANCE_GUIDE.md
|
| 21 |
+
- Phase 4
|
| 22 |
+
- Deployment Guide — PHASE4_DEPLOYMENT_GUIDE.md
|
| 23 |
+
- Runtime Optimization — PHASE4_RUNTIME_OPTIMIZATION.md
|
| 24 |
+
|
| 25 |
+
## Getting started
|
| 26 |
+
|
| 27 |
+
- Launch the integrated Phase 3 dashboard:
|
| 28 |
+
|
| 29 |
+
- python run_phase3_final_dashboard.py
|
| 30 |
+
- Or: streamlit run src/ui/compi_phase3_final_dashboard.py --server.port 8506
|
| 31 |
+
|
| 32 |
+
- Generated images and logs are stored in outputs/
|
| 33 |
+
- Presets live in presets/, export bundles are saved to exports/
|
| 34 |
+
|
| 35 |
+
## Notes
|
| 36 |
+
|
| 37 |
+
- Some guides describe optional workflows (e.g., dataset preparation and LoRA training). Those features remain available via src/generators/\* scripts but are not required for using the Phase 3 Final Dashboard.
|
| 38 |
+
- If you reorganize folders, ensure src/config.py and the Streamlit app constants continue to point to valid directories.
|
packages.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ffmpeg
|
| 2 |
+
libsndfile1
|
| 3 |
+
libgl1
|
requirements.txt
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core Deep Learning
|
| 2 |
+
torch>=2.1.0
|
| 3 |
+
torchvision>=0.16.0
|
| 4 |
+
torchaudio>=2.1.0
|
| 5 |
+
transformers>=4.35.0
|
| 6 |
+
diffusers>=0.24.0
|
| 7 |
+
accelerate>=0.24.0
|
| 8 |
+
|
| 9 |
+
# Audio Processing
|
| 10 |
+
librosa>=0.10.0
|
| 11 |
+
soundfile>=0.12.0
|
| 12 |
+
audioread>=3.0.0
|
| 13 |
+
|
| 14 |
+
# Data Processing & Analysis
|
| 15 |
+
numpy>=1.24.0
|
| 16 |
+
pandas>=2.0.0
|
| 17 |
+
scipy>=1.10.0
|
| 18 |
+
scikit-learn>=1.3.0
|
| 19 |
+
|
| 20 |
+
# Computer Vision
|
| 21 |
+
opencv-python>=4.8.0
|
| 22 |
+
opencv-python-headless>=4.8.0
|
| 23 |
+
Pillow>=10.0.0
|
| 24 |
+
|
| 25 |
+
# Natural Language Processing
|
| 26 |
+
textblob>=0.17.0
|
| 27 |
+
nltk>=3.8.0
|
| 28 |
+
spacy>=3.6.0
|
| 29 |
+
|
| 30 |
+
# Visualization
|
| 31 |
+
matplotlib>=3.7.0
|
| 32 |
+
seaborn>=0.12.0
|
| 33 |
+
plotly>=5.15.0
|
| 34 |
+
|
| 35 |
+
# UI Frameworks
|
| 36 |
+
streamlit>=1.28.0
|
| 37 |
+
gradio>=4.0.0
|
| 38 |
+
|
| 39 |
+
# Utilities
|
| 40 |
+
requests>=2.31.0
|
| 41 |
+
feedparser>=6.0.0
|
| 42 |
+
tqdm>=4.65.0
|
| 43 |
+
python-dotenv>=1.0.0
|
| 44 |
+
|
| 45 |
+
# Development & Testing
|
| 46 |
+
pytest>=7.4.0
|
| 47 |
+
jupyter>=1.0.0
|
| 48 |
+
jupyterlab>=4.0.0
|
| 49 |
+
ipywidgets>=8.0.0
|
| 50 |
+
|
| 51 |
+
# Image Analysis & Evaluation
|
| 52 |
+
imagehash>=4.3.0
|
| 53 |
+
|
| 54 |
+
# LoRA Fine-tuning & Personal Style Training
|
| 55 |
+
peft>=0.7.0
|
| 56 |
+
datasets>=2.14.0
|
| 57 |
+
bitsandbytes>=0.41.0
|
| 58 |
+
|
| 59 |
+
# Audio-to-Text Processing (Phase 2.A)
|
| 60 |
+
openai-whisper>=20231117
|
| 61 |
+
|
| 62 |
+
# Optional: Advanced ML Libraries
|
| 63 |
+
# Uncomment as needed for advanced features
|
| 64 |
+
open-clip-torch>=2.24.0
|
| 65 |
+
controlnet-aux>=0.4.0
|
| 66 |
+
xformers>=0.0.22 # For memory-efficient attention (GPU recommended)
|
| 67 |
+
# wandb>=0.16.0 # Experiment tracking
|
| 68 |
+
safetensors>=0.4.0 # Safe model loading
|
| 69 |
+
|
| 70 |
+
# Optional: Cloud/API Integration
|
| 71 |
+
# boto3>=1.34.0 # AWS SDK
|
| 72 |
+
# google-cloud-storage>=2.10.0 # Google Cloud Platform
|
| 73 |
+
# openai>=1.0.0 # OpenAI API (updated to v1+)
|
run_phase3_final_dashboard.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 3 Final Dashboard Launcher
|
| 4 |
+
|
| 5 |
+
Launch the complete Phase 3 integrated dashboard that combines ALL CompI features:
|
| 6 |
+
|
| 7 |
+
Phase 3.A/3.B: True multimodal fusion with real processing
|
| 8 |
+
- Real audio transcription and analysis
|
| 9 |
+
- Actual data processing and visualization
|
| 10 |
+
- Sentiment analysis and emotion detection
|
| 11 |
+
- Live real-time data feeds (weather, news)
|
| 12 |
+
|
| 13 |
+
Phase 3.C: Advanced references with role assignment
|
| 14 |
+
- Multi-image upload and URL support
|
| 15 |
+
- Style vs structure role assignment
|
| 16 |
+
- Live ControlNet previews (Canny/Depth)
|
| 17 |
+
- Hybrid CN+I2I generation modes
|
| 18 |
+
|
| 19 |
+
Phase 3.D: Professional workflow management
|
| 20 |
+
- Gallery with advanced filtering
|
| 21 |
+
- Rating, tagging, and annotation system
|
| 22 |
+
- Preset save/load functionality
|
| 23 |
+
- Complete export bundles with metadata
|
| 24 |
+
|
| 25 |
+
Phase 3.E: Performance management and model switching
|
| 26 |
+
- SD 1.5 ↔ SDXL model switching
|
| 27 |
+
- LoRA integration with scale control
|
| 28 |
+
- Performance optimizations (xFormers, attention slicing, VAE)
|
| 29 |
+
- VRAM monitoring and OOM auto-retry
|
| 30 |
+
- Optional latent upscaling
|
| 31 |
+
|
| 32 |
+
Usage:
|
| 33 |
+
python run_phase3_final_dashboard.py
|
| 34 |
+
|
| 35 |
+
or
|
| 36 |
+
|
| 37 |
+
streamlit run src/ui/compi_phase3_final_dashboard.py --server.port 8506
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
import os
|
| 41 |
+
import sys
|
| 42 |
+
import subprocess
|
| 43 |
+
from pathlib import Path
|
| 44 |
+
|
| 45 |
+
def check_dependencies():
|
| 46 |
+
"""Check for required dependencies"""
|
| 47 |
+
print("📦 Checking dependencies...")
|
| 48 |
+
|
| 49 |
+
required_packages = {
|
| 50 |
+
"torch": "PyTorch",
|
| 51 |
+
"diffusers": "Diffusers",
|
| 52 |
+
"transformers": "Transformers",
|
| 53 |
+
"accelerate": "Accelerate",
|
| 54 |
+
"streamlit": "Streamlit",
|
| 55 |
+
"pillow": "Pillow (PIL)",
|
| 56 |
+
"numpy": "NumPy",
|
| 57 |
+
"pandas": "Pandas",
|
| 58 |
+
"librosa": "Librosa (audio processing)",
|
| 59 |
+
"matplotlib": "Matplotlib (plotting)",
|
| 60 |
+
"requests": "Requests (HTTP)",
|
| 61 |
+
"feedparser": "FeedParser (RSS feeds)",
|
| 62 |
+
"textblob": "TextBlob (sentiment analysis)"
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
# Special check for OpenCV (accept either opencv-python or opencv-python-headless)
|
| 66 |
+
opencv_available = False
|
| 67 |
+
try:
|
| 68 |
+
import cv2
|
| 69 |
+
opencv_available = True
|
| 70 |
+
required_packages["cv2"] = "OpenCV (image processing)"
|
| 71 |
+
except ImportError:
|
| 72 |
+
pass
|
| 73 |
+
|
| 74 |
+
missing_packages = []
|
| 75 |
+
available_packages = []
|
| 76 |
+
|
| 77 |
+
for package, name in required_packages.items():
|
| 78 |
+
try:
|
| 79 |
+
__import__(package.replace("-", "_"))
|
| 80 |
+
available_packages.append(name)
|
| 81 |
+
except ImportError:
|
| 82 |
+
if package != "cv2": # cv2 already checked above
|
| 83 |
+
missing_packages.append(package)
|
| 84 |
+
|
| 85 |
+
# Add opencv to missing if not available
|
| 86 |
+
if not opencv_available:
|
| 87 |
+
missing_packages.append("opencv-python")
|
| 88 |
+
|
| 89 |
+
print(f"✅ Available: {', '.join(available_packages)}")
|
| 90 |
+
|
| 91 |
+
if missing_packages:
|
| 92 |
+
print(f"❌ Missing: {', '.join(missing_packages)}")
|
| 93 |
+
return False
|
| 94 |
+
|
| 95 |
+
return True
|
| 96 |
+
|
| 97 |
+
def check_optional_features():
|
| 98 |
+
"""Check for optional features"""
|
| 99 |
+
print("\n🔍 Checking optional features...")
|
| 100 |
+
|
| 101 |
+
# Check Whisper
|
| 102 |
+
try:
|
| 103 |
+
import whisper
|
| 104 |
+
print("✅ Whisper available for audio transcription")
|
| 105 |
+
except ImportError:
|
| 106 |
+
print("⚠️ Whisper not available (will be installed on first use)")
|
| 107 |
+
|
| 108 |
+
# Check SDXL availability
|
| 109 |
+
try:
|
| 110 |
+
from diffusers import StableDiffusionXLPipeline
|
| 111 |
+
print("✅ SDXL support available")
|
| 112 |
+
except ImportError:
|
| 113 |
+
print("⚠️ SDXL not available (requires newer diffusers)")
|
| 114 |
+
|
| 115 |
+
# Check ControlNet availability
|
| 116 |
+
try:
|
| 117 |
+
from diffusers import StableDiffusionControlNetPipeline
|
| 118 |
+
print("✅ ControlNet available")
|
| 119 |
+
except ImportError:
|
| 120 |
+
print("⚠️ ControlNet not available")
|
| 121 |
+
|
| 122 |
+
# Check upscaler availability
|
| 123 |
+
try:
|
| 124 |
+
from diffusers import StableDiffusionLatentUpscalePipeline
|
| 125 |
+
print("✅ Latent Upscaler available")
|
| 126 |
+
except ImportError:
|
| 127 |
+
print("⚠️ Latent Upscaler not available")
|
| 128 |
+
|
| 129 |
+
# Check xFormers
|
| 130 |
+
try:
|
| 131 |
+
import xformers
|
| 132 |
+
print("✅ xFormers available for memory optimization")
|
| 133 |
+
except ImportError:
|
| 134 |
+
print("⚠️ xFormers not available (optional performance boost)")
|
| 135 |
+
|
| 136 |
+
def check_gpu_setup():
|
| 137 |
+
"""Check GPU setup and provide recommendations"""
|
| 138 |
+
print("\n🔍 Checking GPU setup...")
|
| 139 |
+
|
| 140 |
+
try:
|
| 141 |
+
import torch
|
| 142 |
+
|
| 143 |
+
if torch.cuda.is_available():
|
| 144 |
+
gpu_count = torch.cuda.device_count()
|
| 145 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 146 |
+
total_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
| 147 |
+
|
| 148 |
+
print(f"✅ CUDA available: {gpu_count} GPU(s)")
|
| 149 |
+
print(f" Primary GPU: {gpu_name}")
|
| 150 |
+
print(f" VRAM: {total_memory:.1f} GB")
|
| 151 |
+
|
| 152 |
+
if total_memory >= 12.0:
|
| 153 |
+
print("✅ Excellent VRAM for all features including SDXL")
|
| 154 |
+
elif total_memory >= 8.0:
|
| 155 |
+
print("✅ Good VRAM for SDXL and most features")
|
| 156 |
+
elif total_memory >= 6.0:
|
| 157 |
+
print("✅ Sufficient VRAM for SD 1.5 and most features")
|
| 158 |
+
print("⚠️ SDXL may require optimizations")
|
| 159 |
+
elif total_memory >= 4.0:
|
| 160 |
+
print("✅ Minimum VRAM for SD 1.5")
|
| 161 |
+
print("⚠️ Use aggressive optimizations for best performance")
|
| 162 |
+
else:
|
| 163 |
+
print("⚠️ Limited VRAM - consider CPU mode or cloud GPU")
|
| 164 |
+
|
| 165 |
+
return True
|
| 166 |
+
else:
|
| 167 |
+
print("⚠️ CUDA not available - will use CPU mode")
|
| 168 |
+
print("💡 CPU mode is slower but still functional")
|
| 169 |
+
return False
|
| 170 |
+
|
| 171 |
+
except ImportError:
|
| 172 |
+
print("❌ PyTorch not found")
|
| 173 |
+
return False
|
| 174 |
+
|
| 175 |
+
def install_missing_dependencies():
|
| 176 |
+
"""Install missing dependencies"""
|
| 177 |
+
print("\n📦 Installing missing dependencies...")
|
| 178 |
+
|
| 179 |
+
try:
|
| 180 |
+
# Core dependencies
|
| 181 |
+
core_packages = [
|
| 182 |
+
"torch", "torchvision", "torchaudio",
|
| 183 |
+
"diffusers>=0.21.0", "transformers", "accelerate",
|
| 184 |
+
"streamlit", "pillow", "numpy", "pandas",
|
| 185 |
+
"librosa", "opencv-python", "matplotlib",
|
| 186 |
+
"requests", "feedparser", "textblob"
|
| 187 |
+
]
|
| 188 |
+
|
| 189 |
+
print("Installing core packages...")
|
| 190 |
+
subprocess.check_call([
|
| 191 |
+
sys.executable, "-m", "pip", "install"
|
| 192 |
+
] + core_packages)
|
| 193 |
+
|
| 194 |
+
print("✅ Core dependencies installed")
|
| 195 |
+
|
| 196 |
+
# Optional performance packages (skip xformers due to compatibility issues)
|
| 197 |
+
print("⚠️ Skipping xFormers installation (compatibility issues with current PyTorch version)")
|
| 198 |
+
|
| 199 |
+
return True
|
| 200 |
+
|
| 201 |
+
except subprocess.CalledProcessError as e:
|
| 202 |
+
print(f"❌ Installation failed: {e}")
|
| 203 |
+
return False
|
| 204 |
+
|
| 205 |
+
def main():
|
| 206 |
+
"""Launch Phase 3 Final Dashboard"""
|
| 207 |
+
|
| 208 |
+
print("🧪 CompI Phase 3 Final Dashboard")
|
| 209 |
+
print("=" * 80)
|
| 210 |
+
print()
|
| 211 |
+
print("🎯 Complete Phase 3 Integration (3.A → 3.E):")
|
| 212 |
+
print(" • 🧩 Multimodal Inputs: Text, Audio, Data, Emotion, Real-time")
|
| 213 |
+
print(" • 🖼️ Advanced References: Role assignment, ControlNet, live previews")
|
| 214 |
+
print(" • ⚙️ Model & Performance: SD 1.5/SDXL, LoRA, VRAM monitoring")
|
| 215 |
+
print(" • 🎛️ Intelligent Generation: Hybrid modes, OOM recovery")
|
| 216 |
+
print(" • 🖼️ Professional Gallery: Filtering, rating, annotation")
|
| 217 |
+
print(" • 💾 Preset Management: Save/load configurations")
|
| 218 |
+
print(" • 📦 Export System: Complete bundles with metadata")
|
| 219 |
+
print()
|
| 220 |
+
|
| 221 |
+
# Check if the UI file exists
|
| 222 |
+
ui_file = Path("src/ui/compi_phase3_final_dashboard.py")
|
| 223 |
+
if not ui_file.exists():
|
| 224 |
+
print(f"❌ Error: {ui_file} not found!")
|
| 225 |
+
print("Make sure you're running this from the project root directory.")
|
| 226 |
+
return 1
|
| 227 |
+
|
| 228 |
+
# Check dependencies
|
| 229 |
+
if not check_dependencies():
|
| 230 |
+
print("\n❌ Missing dependencies detected.")
|
| 231 |
+
install = input("Install missing dependencies? (y/n): ").lower().strip()
|
| 232 |
+
|
| 233 |
+
if install == 'y':
|
| 234 |
+
if not install_missing_dependencies():
|
| 235 |
+
print("❌ Failed to install dependencies")
|
| 236 |
+
return 1
|
| 237 |
+
else:
|
| 238 |
+
print("❌ Cannot proceed without required dependencies")
|
| 239 |
+
return 1
|
| 240 |
+
|
| 241 |
+
# Check GPU setup
|
| 242 |
+
has_gpu = check_gpu_setup()
|
| 243 |
+
|
| 244 |
+
# Check optional features
|
| 245 |
+
check_optional_features()
|
| 246 |
+
|
| 247 |
+
print()
|
| 248 |
+
print("🚀 Launching Phase 3 Final Dashboard...")
|
| 249 |
+
print("📍 Access at: http://localhost:8506")
|
| 250 |
+
print()
|
| 251 |
+
|
| 252 |
+
if has_gpu:
|
| 253 |
+
print("💡 GPU Tips:")
|
| 254 |
+
print(" • Monitor VRAM usage in the top metrics bar")
|
| 255 |
+
print(" • Use performance optimizations in Model & Performance tab")
|
| 256 |
+
print(" • Enable OOM auto-retry for reliability")
|
| 257 |
+
print(" • Try SDXL for higher quality (requires 8+ GB VRAM)")
|
| 258 |
+
else:
|
| 259 |
+
print("💡 CPU Tips:")
|
| 260 |
+
print(" • Generation will be slower but still functional")
|
| 261 |
+
print(" • Use smaller image sizes (512x512 or less)")
|
| 262 |
+
print(" • Reduce inference steps for faster generation")
|
| 263 |
+
print(" • Stick to SD 1.5 model for best performance")
|
| 264 |
+
|
| 265 |
+
print()
|
| 266 |
+
print("🎨 Getting Started:")
|
| 267 |
+
print(" 1. 🧩 Configure multimodal inputs (audio, data, emotion, real-time)")
|
| 268 |
+
print(" 2. 🖼️ Upload reference images and assign roles (style vs structure)")
|
| 269 |
+
print(" 3. ⚙️ Choose model and optimize performance settings")
|
| 270 |
+
print(" 4. 🎛️ Generate with intelligent fusion of all inputs")
|
| 271 |
+
print(" 5. 🖼️ Review results in gallery and add annotations")
|
| 272 |
+
print(" 6. 💾 Save presets for reuse")
|
| 273 |
+
print(" 7. 📦 Export complete bundles with metadata")
|
| 274 |
+
print()
|
| 275 |
+
|
| 276 |
+
# Launch Streamlit
|
| 277 |
+
try:
|
| 278 |
+
cmd = [
|
| 279 |
+
sys.executable, "-m", "streamlit", "run",
|
| 280 |
+
str(ui_file),
|
| 281 |
+
"--server.port", "8506",
|
| 282 |
+
"--server.headless", "true",
|
| 283 |
+
"--browser.gatherUsageStats", "false"
|
| 284 |
+
]
|
| 285 |
+
|
| 286 |
+
subprocess.run(cmd)
|
| 287 |
+
|
| 288 |
+
except KeyboardInterrupt:
|
| 289 |
+
print("\n👋 Phase 3 Final Dashboard stopped by user")
|
| 290 |
+
return 0
|
| 291 |
+
except Exception as e:
|
| 292 |
+
print(f"❌ Error launching Streamlit: {e}")
|
| 293 |
+
return 1
|
| 294 |
+
|
| 295 |
+
return 0
|
| 296 |
+
|
| 297 |
+
if __name__ == "__main__":
|
| 298 |
+
exit_code = main()
|
| 299 |
+
sys.exit(exit_code)
|
src/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI - Compositional Intelligence Project
|
| 3 |
+
A multi-modal AI system for creative content generation.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
__version__ = "0.1.0"
|
| 7 |
+
__author__ = "CompI Development Team"
|
src/config.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Configuration settings for CompI project.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
# Project paths
|
| 9 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 10 |
+
DATA_DIR = PROJECT_ROOT / "data"
|
| 11 |
+
OUTPUTS_DIR = PROJECT_ROOT / "outputs"
|
| 12 |
+
MODELS_DIR = PROJECT_ROOT / "models"
|
| 13 |
+
|
| 14 |
+
# Create directories if they don't exist
|
| 15 |
+
for dir_path in [DATA_DIR, OUTPUTS_DIR, MODELS_DIR]:
|
| 16 |
+
dir_path.mkdir(exist_ok=True)
|
| 17 |
+
|
| 18 |
+
# Model configurations
|
| 19 |
+
STABLE_DIFFUSION_MODEL = "runwayml/stable-diffusion-v1-5"
|
| 20 |
+
STABLE_DIFFUSION_IMG2IMG_MODEL = "runwayml/stable-diffusion-v1-5"
|
| 21 |
+
EMOTION_MODEL = "j-hartmann/emotion-english-distilroberta-base"
|
| 22 |
+
SENTIMENT_MODEL = "cardiffnlp/twitter-roberta-base-sentiment-latest"
|
| 23 |
+
|
| 24 |
+
# Generation settings
|
| 25 |
+
DEFAULT_IMAGE_SIZE = (512, 512)
|
| 26 |
+
DEFAULT_INFERENCE_STEPS = 20
|
| 27 |
+
DEFAULT_GUIDANCE_SCALE = 7.5
|
| 28 |
+
|
| 29 |
+
# Audio settings
|
| 30 |
+
SAMPLE_RATE = 22050
|
| 31 |
+
AUDIO_DURATION = 10 # seconds
|
| 32 |
+
|
| 33 |
+
# Device settings
|
| 34 |
+
DEVICE = "cuda" if os.getenv("CUDA_AVAILABLE", "false").lower() == "true" else "cpu"
|
| 35 |
+
|
| 36 |
+
# API keys (load from environment)
|
| 37 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 38 |
+
HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
|
| 39 |
+
|
| 40 |
+
# Logging
|
| 41 |
+
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
|
| 42 |
+
LOG_FORMAT = "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
src/generators/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Image Generators
|
| 3 |
+
Text-to-image generation modules for the CompI platform.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from .compi_phase1_text2image import *
|
| 7 |
+
from .compi_phase1_advanced import *
|
| 8 |
+
from .compi_phase1b_styled_generation import *
|
| 9 |
+
from .compi_phase1b_advanced_styling import *
|
| 10 |
+
from .compi_phase2a_audio_to_image import *
|
| 11 |
+
from .compi_phase2b_data_to_image import *
|
| 12 |
+
from .compi_phase2c_emotion_to_image import *
|
| 13 |
+
from .compi_phase2d_realtime_to_image import *
|
| 14 |
+
from .compi_phase2e_refimg_to_image import *
|
| 15 |
+
|
| 16 |
+
__all__ = [
|
| 17 |
+
"compi_phase1_text2image",
|
| 18 |
+
"compi_phase1_advanced",
|
| 19 |
+
"compi_phase1b_styled_generation",
|
| 20 |
+
"compi_phase1b_advanced_styling",
|
| 21 |
+
"compi_phase2a_audio_to_image",
|
| 22 |
+
"compi_phase2b_data_to_image",
|
| 23 |
+
"compi_phase2c_emotion_to_image",
|
| 24 |
+
"compi_phase2d_realtime_to_image",
|
| 25 |
+
"compi_phase2e_refimg_to_image"
|
| 26 |
+
]
|
src/generators/compi_phase1_advanced.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# compi_phase1_advanced.py
|
| 2 |
+
# Enhanced text-to-image generation with batch processing, negative prompts, and style controls
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
import torch
|
| 7 |
+
import argparse
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
from diffusers import StableDiffusionPipeline
|
| 10 |
+
from PIL import Image
|
| 11 |
+
import json
|
| 12 |
+
|
| 13 |
+
# Add project root to path for imports
|
| 14 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 15 |
+
|
| 16 |
+
# ------------------ 1. SETUP AND ARGUMENT PARSING ------------------
|
| 17 |
+
|
| 18 |
+
def setup_args():
|
| 19 |
+
parser = argparse.ArgumentParser(description="CompI Phase 1: Advanced Text-to-Image Generation")
|
| 20 |
+
parser.add_argument("prompt", nargs="*", help="Text prompt for image generation")
|
| 21 |
+
parser.add_argument("--negative", "-n", default="", help="Negative prompt (what to avoid)")
|
| 22 |
+
parser.add_argument("--steps", "-s", type=int, default=30, help="Number of inference steps (default: 30)")
|
| 23 |
+
parser.add_argument("--guidance", "-g", type=float, default=7.5, help="Guidance scale (default: 7.5)")
|
| 24 |
+
parser.add_argument("--seed", type=int, default=None, help="Random seed for reproducibility")
|
| 25 |
+
parser.add_argument("--batch", "-b", type=int, default=1, help="Number of images to generate")
|
| 26 |
+
parser.add_argument("--width", "-w", type=int, default=512, help="Image width (default: 512)")
|
| 27 |
+
parser.add_argument("--height", type=int, default=512, help="Image height (default: 512)")
|
| 28 |
+
parser.add_argument("--model", "-m", default="runwayml/stable-diffusion-v1-5", help="Model to use")
|
| 29 |
+
parser.add_argument("--output", "-o", default="outputs", help="Output directory")
|
| 30 |
+
parser.add_argument("--interactive", "-i", action="store_true", help="Interactive mode")
|
| 31 |
+
return parser.parse_args()
|
| 32 |
+
|
| 33 |
+
# Check for GPU
|
| 34 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 35 |
+
print(f"Using device: {device}")
|
| 36 |
+
|
| 37 |
+
# Logging function
|
| 38 |
+
def log(msg):
|
| 39 |
+
now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
|
| 40 |
+
print(f"{now} {msg}")
|
| 41 |
+
|
| 42 |
+
# ------------------ 2. MODEL LOADING ------------------
|
| 43 |
+
|
| 44 |
+
def load_model(model_name):
|
| 45 |
+
log(f"Loading model: {model_name}")
|
| 46 |
+
|
| 47 |
+
def dummy_safety_checker(images, **kwargs):
|
| 48 |
+
return images, [False] * len(images)
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 52 |
+
model_name,
|
| 53 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 54 |
+
safety_checker=dummy_safety_checker,
|
| 55 |
+
)
|
| 56 |
+
pipe = pipe.to(device)
|
| 57 |
+
|
| 58 |
+
# Memory optimizations
|
| 59 |
+
pipe.enable_attention_slicing()
|
| 60 |
+
# Note: enable_memory_efficient_attention() is deprecated in newer versions
|
| 61 |
+
|
| 62 |
+
log("Model loaded successfully")
|
| 63 |
+
return pipe
|
| 64 |
+
except Exception as e:
|
| 65 |
+
log(f"Error loading model: {e}")
|
| 66 |
+
sys.exit(1)
|
| 67 |
+
|
| 68 |
+
# ------------------ 3. GENERATION FUNCTION ------------------
|
| 69 |
+
|
| 70 |
+
def generate_image(pipe, prompt, negative_prompt="", **kwargs):
|
| 71 |
+
"""Generate a single image with given parameters"""
|
| 72 |
+
|
| 73 |
+
# Set up generator
|
| 74 |
+
seed = kwargs.get('seed', torch.seed())
|
| 75 |
+
if device == "cuda":
|
| 76 |
+
generator = torch.Generator(device).manual_seed(seed)
|
| 77 |
+
else:
|
| 78 |
+
generator = torch.manual_seed(seed)
|
| 79 |
+
|
| 80 |
+
# Generation parameters
|
| 81 |
+
params = {
|
| 82 |
+
'prompt': prompt,
|
| 83 |
+
'negative_prompt': negative_prompt if negative_prompt else None,
|
| 84 |
+
'height': kwargs.get('height', 512),
|
| 85 |
+
'width': kwargs.get('width', 512),
|
| 86 |
+
'num_inference_steps': kwargs.get('steps', 30),
|
| 87 |
+
'guidance_scale': kwargs.get('guidance', 7.5),
|
| 88 |
+
'generator': generator,
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
log(f"Generating: '{prompt[:50]}...' (seed: {seed})")
|
| 92 |
+
|
| 93 |
+
with torch.autocast(device) if device == "cuda" else torch.no_grad():
|
| 94 |
+
result = pipe(**params)
|
| 95 |
+
return result.images[0], seed
|
| 96 |
+
|
| 97 |
+
# ------------------ 4. SAVE FUNCTION ------------------
|
| 98 |
+
|
| 99 |
+
def save_image(image, prompt, seed, output_dir, metadata=None):
|
| 100 |
+
"""Save image with descriptive filename and metadata"""
|
| 101 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 102 |
+
|
| 103 |
+
# Create filename
|
| 104 |
+
prompt_slug = "_".join(prompt.lower().split()[:6])
|
| 105 |
+
prompt_slug = "".join(c for c in prompt_slug if c.isalnum() or c in "_-")[:40]
|
| 106 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 107 |
+
filename = f"{prompt_slug}_{timestamp}_seed{seed}.png"
|
| 108 |
+
filepath = os.path.join(output_dir, filename)
|
| 109 |
+
|
| 110 |
+
# Save image
|
| 111 |
+
image.save(filepath)
|
| 112 |
+
|
| 113 |
+
# Save metadata
|
| 114 |
+
if metadata:
|
| 115 |
+
metadata_file = filepath.replace('.png', '_metadata.json')
|
| 116 |
+
with open(metadata_file, 'w') as f:
|
| 117 |
+
json.dump(metadata, f, indent=2)
|
| 118 |
+
|
| 119 |
+
log(f"Saved: {filepath}")
|
| 120 |
+
return filepath
|
| 121 |
+
|
| 122 |
+
# ------------------ 5. INTERACTIVE MODE ------------------
|
| 123 |
+
|
| 124 |
+
def interactive_mode(pipe, output_dir):
|
| 125 |
+
"""Interactive prompt mode for experimentation"""
|
| 126 |
+
log("Entering interactive mode. Type 'quit' to exit.")
|
| 127 |
+
|
| 128 |
+
while True:
|
| 129 |
+
try:
|
| 130 |
+
prompt = input("\n🎨 Enter prompt: ").strip()
|
| 131 |
+
if prompt.lower() in ['quit', 'exit', 'q']:
|
| 132 |
+
break
|
| 133 |
+
|
| 134 |
+
if not prompt:
|
| 135 |
+
continue
|
| 136 |
+
|
| 137 |
+
negative = input("❌ Negative prompt (optional): ").strip()
|
| 138 |
+
|
| 139 |
+
# Quick settings
|
| 140 |
+
print("⚙️ Quick settings (press Enter for defaults):")
|
| 141 |
+
steps = input(f" Steps (30): ").strip()
|
| 142 |
+
steps = int(steps) if steps else 30
|
| 143 |
+
|
| 144 |
+
guidance = input(f" Guidance (7.5): ").strip()
|
| 145 |
+
guidance = float(guidance) if guidance else 7.5
|
| 146 |
+
|
| 147 |
+
# Generate
|
| 148 |
+
image, seed = generate_image(
|
| 149 |
+
pipe, prompt, negative,
|
| 150 |
+
steps=steps, guidance=guidance
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Save with metadata
|
| 154 |
+
metadata = {
|
| 155 |
+
'prompt': prompt,
|
| 156 |
+
'negative_prompt': negative,
|
| 157 |
+
'steps': steps,
|
| 158 |
+
'guidance_scale': guidance,
|
| 159 |
+
'seed': seed,
|
| 160 |
+
'timestamp': datetime.now().isoformat()
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
save_image(image, prompt, seed, output_dir, metadata)
|
| 164 |
+
|
| 165 |
+
except KeyboardInterrupt:
|
| 166 |
+
print("\n👋 Goodbye!")
|
| 167 |
+
break
|
| 168 |
+
except Exception as e:
|
| 169 |
+
log(f"Error: {e}")
|
| 170 |
+
|
| 171 |
+
# ------------------ 6. MAIN FUNCTION ------------------
|
| 172 |
+
|
| 173 |
+
def main():
|
| 174 |
+
args = setup_args()
|
| 175 |
+
|
| 176 |
+
# Load model
|
| 177 |
+
pipe = load_model(args.model)
|
| 178 |
+
|
| 179 |
+
# Interactive mode
|
| 180 |
+
if args.interactive:
|
| 181 |
+
interactive_mode(pipe, args.output)
|
| 182 |
+
return
|
| 183 |
+
|
| 184 |
+
# Get prompt
|
| 185 |
+
if args.prompt:
|
| 186 |
+
prompt = " ".join(args.prompt)
|
| 187 |
+
else:
|
| 188 |
+
prompt = input("Enter your prompt: ").strip()
|
| 189 |
+
|
| 190 |
+
if not prompt:
|
| 191 |
+
log("No prompt provided. Use --interactive for interactive mode.")
|
| 192 |
+
return
|
| 193 |
+
|
| 194 |
+
# Generate batch
|
| 195 |
+
log(f"Generating {args.batch} image(s)")
|
| 196 |
+
|
| 197 |
+
for i in range(args.batch):
|
| 198 |
+
try:
|
| 199 |
+
# Use provided seed or generate random one
|
| 200 |
+
seed = args.seed if args.seed else torch.seed()
|
| 201 |
+
|
| 202 |
+
image, actual_seed = generate_image(
|
| 203 |
+
pipe, prompt, args.negative,
|
| 204 |
+
seed=seed, steps=args.steps, guidance=args.guidance,
|
| 205 |
+
width=args.width, height=args.height
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
# Prepare metadata
|
| 209 |
+
metadata = {
|
| 210 |
+
'prompt': prompt,
|
| 211 |
+
'negative_prompt': args.negative,
|
| 212 |
+
'steps': args.steps,
|
| 213 |
+
'guidance_scale': args.guidance,
|
| 214 |
+
'seed': actual_seed,
|
| 215 |
+
'width': args.width,
|
| 216 |
+
'height': args.height,
|
| 217 |
+
'model': args.model,
|
| 218 |
+
'batch_index': i + 1,
|
| 219 |
+
'timestamp': datetime.now().isoformat()
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
save_image(image, prompt, actual_seed, args.output, metadata)
|
| 223 |
+
|
| 224 |
+
except Exception as e:
|
| 225 |
+
log(f"Error generating image {i+1}: {e}")
|
| 226 |
+
|
| 227 |
+
log("Generation complete!")
|
| 228 |
+
|
| 229 |
+
if __name__ == "__main__":
|
| 230 |
+
main()
|
src/generators/compi_phase1_text2image.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# compi_phase1_text2image.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
import torch
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from diffusers import StableDiffusionPipeline
|
| 8 |
+
from PIL import Image
|
| 9 |
+
|
| 10 |
+
# Add project root to path for imports
|
| 11 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 12 |
+
|
| 13 |
+
# ------------------ 1. SETUP AND CHECKS ------------------
|
| 14 |
+
|
| 15 |
+
# Check for GPU
|
| 16 |
+
if torch.cuda.is_available():
|
| 17 |
+
device = "cuda"
|
| 18 |
+
print("CUDA GPU detected. Running on GPU for best performance.")
|
| 19 |
+
else:
|
| 20 |
+
device = "cpu"
|
| 21 |
+
print("No CUDA GPU detected. Running on CPU. Generation will be slow.")
|
| 22 |
+
|
| 23 |
+
# Set up output directory
|
| 24 |
+
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', "outputs")
|
| 25 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 26 |
+
|
| 27 |
+
# Logging function
|
| 28 |
+
def log(msg):
|
| 29 |
+
now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
|
| 30 |
+
print(f"{now} {msg}")
|
| 31 |
+
|
| 32 |
+
# ------------------ 2. LOAD MODEL ------------------
|
| 33 |
+
|
| 34 |
+
MODEL_NAME = "runwayml/stable-diffusion-v1-5"
|
| 35 |
+
log(f"Loading model: {MODEL_NAME} (this may take a minute on first run)")
|
| 36 |
+
|
| 37 |
+
# Optionally, disable the safety checker for pure creative exploration
|
| 38 |
+
def dummy_safety_checker(images, **kwargs):
|
| 39 |
+
return images, [False] * len(images)
|
| 40 |
+
|
| 41 |
+
try:
|
| 42 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 43 |
+
MODEL_NAME,
|
| 44 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 45 |
+
safety_checker=dummy_safety_checker, # Remove for production!
|
| 46 |
+
)
|
| 47 |
+
except Exception as e:
|
| 48 |
+
log(f"Error loading model: {e}")
|
| 49 |
+
sys.exit(1)
|
| 50 |
+
|
| 51 |
+
pipe = pipe.to(device)
|
| 52 |
+
pipe.enable_attention_slicing() # Reduce VRAM use
|
| 53 |
+
|
| 54 |
+
log("Model loaded successfully.")
|
| 55 |
+
|
| 56 |
+
# ------------------ 3. PROMPT HANDLING ------------------
|
| 57 |
+
|
| 58 |
+
def main():
|
| 59 |
+
"""Main function for command-line execution"""
|
| 60 |
+
if len(sys.argv) > 1:
|
| 61 |
+
prompt = " ".join(sys.argv[1:])
|
| 62 |
+
log(f"Prompt taken from command line: {prompt}")
|
| 63 |
+
else:
|
| 64 |
+
prompt = input("Enter your prompt (e.g. 'A magical forest, digital art'): ").strip()
|
| 65 |
+
log(f"Prompt entered: {prompt}")
|
| 66 |
+
|
| 67 |
+
if not prompt:
|
| 68 |
+
log("No prompt provided. Exiting.")
|
| 69 |
+
sys.exit(0)
|
| 70 |
+
|
| 71 |
+
# ------------------ 4. GENERATION PARAMETERS ------------------
|
| 72 |
+
|
| 73 |
+
SEED = torch.seed() # You can use a fixed seed for reproducibility, e.g. 1234
|
| 74 |
+
generator = torch.manual_seed(SEED) if device == "cpu" else torch.Generator(device).manual_seed(torch.seed())
|
| 75 |
+
|
| 76 |
+
num_inference_steps = 30 # More steps = better quality, slower (default 50)
|
| 77 |
+
guidance_scale = 7.5 # Higher = follow prompt more strictly
|
| 78 |
+
|
| 79 |
+
# Output image size (SDv1.5 default 512x512)
|
| 80 |
+
height = 512
|
| 81 |
+
width = 512
|
| 82 |
+
|
| 83 |
+
# ------------------ 5. IMAGE GENERATION ------------------
|
| 84 |
+
|
| 85 |
+
log(f"Generating image for prompt: {prompt}")
|
| 86 |
+
log(f"Params: steps={num_inference_steps}, guidance_scale={guidance_scale}, seed={SEED}")
|
| 87 |
+
|
| 88 |
+
with torch.autocast(device) if device == "cuda" else torch.no_grad():
|
| 89 |
+
result = pipe(
|
| 90 |
+
prompt,
|
| 91 |
+
height=height,
|
| 92 |
+
width=width,
|
| 93 |
+
num_inference_steps=num_inference_steps,
|
| 94 |
+
guidance_scale=guidance_scale,
|
| 95 |
+
generator=generator,
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
image: Image.Image = result.images[0]
|
| 99 |
+
|
| 100 |
+
# ------------------ 6. SAVE OUTPUT ------------------
|
| 101 |
+
|
| 102 |
+
# Filename: prompt short, datetime, seed
|
| 103 |
+
prompt_slug = "_".join(prompt.lower().split()[:6])
|
| 104 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 105 |
+
filename = f"{prompt_slug[:40]}_{timestamp}_seed{SEED}.png"
|
| 106 |
+
filepath = os.path.join(OUTPUT_DIR, filename)
|
| 107 |
+
image.save(filepath)
|
| 108 |
+
log(f"Image saved to {filepath}")
|
| 109 |
+
|
| 110 |
+
# Optionally, show image (uncomment next line if running locally)
|
| 111 |
+
# image.show()
|
| 112 |
+
|
| 113 |
+
# Log end
|
| 114 |
+
log("Generation complete.")
|
| 115 |
+
|
| 116 |
+
if __name__ == "__main__":
|
| 117 |
+
main()
|
src/generators/compi_phase1b_advanced_styling.py
ADDED
|
@@ -0,0 +1,338 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# compi_phase1b_advanced_styling.py
|
| 2 |
+
# Advanced style conditioning with negative prompts, quality settings, and enhanced prompt engineering
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import sys
|
| 6 |
+
import torch
|
| 7 |
+
import json
|
| 8 |
+
import argparse
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
from diffusers import StableDiffusionPipeline
|
| 11 |
+
from PIL import Image
|
| 12 |
+
|
| 13 |
+
# Add project root to path for imports
|
| 14 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 15 |
+
|
| 16 |
+
# -------- 1. SETUP AND ARGUMENT PARSING --------
|
| 17 |
+
|
| 18 |
+
def setup_args():
|
| 19 |
+
parser = argparse.ArgumentParser(description="CompI Phase 1.B: Advanced Style Conditioning")
|
| 20 |
+
parser.add_argument("prompt", nargs="*", help="Main scene/subject description")
|
| 21 |
+
parser.add_argument("--style", "-s", help="Art style (or number from list)")
|
| 22 |
+
parser.add_argument("--mood", "-m", help="Mood/atmosphere (or number from list)")
|
| 23 |
+
parser.add_argument("--variations", "-v", type=int, default=1, help="Number of variations")
|
| 24 |
+
parser.add_argument("--quality", "-q", choices=["draft", "standard", "high"], default="standard", help="Quality preset")
|
| 25 |
+
parser.add_argument("--negative", "-n", help="Negative prompt")
|
| 26 |
+
parser.add_argument("--interactive", "-i", action="store_true", help="Interactive mode")
|
| 27 |
+
parser.add_argument("--list-styles", action="store_true", help="List available styles and exit")
|
| 28 |
+
parser.add_argument("--list-moods", action="store_true", help="List available moods and exit")
|
| 29 |
+
return parser.parse_args()
|
| 30 |
+
|
| 31 |
+
# Device setup
|
| 32 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 33 |
+
print(f"Using device: {device}")
|
| 34 |
+
|
| 35 |
+
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', "outputs")
|
| 36 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 37 |
+
|
| 38 |
+
def log(msg):
|
| 39 |
+
now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
|
| 40 |
+
print(f"{now} {msg}")
|
| 41 |
+
|
| 42 |
+
# -------- 2. STYLE AND MOOD DEFINITIONS --------
|
| 43 |
+
|
| 44 |
+
STYLES = {
|
| 45 |
+
"digital art": {
|
| 46 |
+
"prompt": "digital art, highly detailed",
|
| 47 |
+
"negative": "blurry, pixelated, low resolution"
|
| 48 |
+
},
|
| 49 |
+
"oil painting": {
|
| 50 |
+
"prompt": "oil painting, classical art, brushstrokes, canvas texture",
|
| 51 |
+
"negative": "digital, pixelated, modern"
|
| 52 |
+
},
|
| 53 |
+
"watercolor": {
|
| 54 |
+
"prompt": "watercolor painting, soft colors, flowing paint",
|
| 55 |
+
"negative": "harsh lines, digital, photographic"
|
| 56 |
+
},
|
| 57 |
+
"cyberpunk": {
|
| 58 |
+
"prompt": "cyberpunk style, neon lights, futuristic, sci-fi",
|
| 59 |
+
"negative": "natural, organic, pastoral"
|
| 60 |
+
},
|
| 61 |
+
"impressionist": {
|
| 62 |
+
"prompt": "impressionist painting, soft brushstrokes, light and color",
|
| 63 |
+
"negative": "sharp details, photorealistic, digital"
|
| 64 |
+
},
|
| 65 |
+
"concept art": {
|
| 66 |
+
"prompt": "concept art, professional illustration, detailed",
|
| 67 |
+
"negative": "amateur, sketch, unfinished"
|
| 68 |
+
},
|
| 69 |
+
"anime": {
|
| 70 |
+
"prompt": "anime style, manga, Japanese animation",
|
| 71 |
+
"negative": "realistic, western cartoon, photographic"
|
| 72 |
+
},
|
| 73 |
+
"photorealistic": {
|
| 74 |
+
"prompt": "photorealistic, high detail, professional photography",
|
| 75 |
+
"negative": "cartoon, painting, stylized"
|
| 76 |
+
},
|
| 77 |
+
"minimalist": {
|
| 78 |
+
"prompt": "minimalist art, clean lines, simple composition",
|
| 79 |
+
"negative": "cluttered, complex, detailed"
|
| 80 |
+
},
|
| 81 |
+
"surrealism": {
|
| 82 |
+
"prompt": "surrealist art, dreamlike, impossible, Salvador Dali style",
|
| 83 |
+
"negative": "realistic, logical, mundane"
|
| 84 |
+
},
|
| 85 |
+
"pixel art": {
|
| 86 |
+
"prompt": "pixel art, 8-bit style, retro gaming",
|
| 87 |
+
"negative": "smooth, high resolution, photorealistic"
|
| 88 |
+
},
|
| 89 |
+
"steampunk": {
|
| 90 |
+
"prompt": "steampunk style, Victorian era, brass and copper, gears",
|
| 91 |
+
"negative": "modern, digital, futuristic"
|
| 92 |
+
},
|
| 93 |
+
"3d render": {
|
| 94 |
+
"prompt": "3D render, CGI, computer graphics, ray tracing",
|
| 95 |
+
"negative": "2D, flat, hand-drawn"
|
| 96 |
+
}
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
MOODS = {
|
| 100 |
+
"dreamy": {
|
| 101 |
+
"prompt": "dreamy atmosphere, soft lighting, ethereal",
|
| 102 |
+
"negative": "harsh, stark, realistic"
|
| 103 |
+
},
|
| 104 |
+
"dark": {
|
| 105 |
+
"prompt": "dark and moody, dramatic shadows, mysterious",
|
| 106 |
+
"negative": "bright, cheerful, light"
|
| 107 |
+
},
|
| 108 |
+
"peaceful": {
|
| 109 |
+
"prompt": "peaceful, serene, calm, tranquil",
|
| 110 |
+
"negative": "chaotic, violent, disturbing"
|
| 111 |
+
},
|
| 112 |
+
"vibrant": {
|
| 113 |
+
"prompt": "vibrant and energetic, bright colors, dynamic",
|
| 114 |
+
"negative": "dull, muted, lifeless"
|
| 115 |
+
},
|
| 116 |
+
"melancholic": {
|
| 117 |
+
"prompt": "melancholic, sad, nostalgic, wistful",
|
| 118 |
+
"negative": "happy, joyful, upbeat"
|
| 119 |
+
},
|
| 120 |
+
"mysterious": {
|
| 121 |
+
"prompt": "mysterious, enigmatic, hidden secrets",
|
| 122 |
+
"negative": "obvious, clear, straightforward"
|
| 123 |
+
},
|
| 124 |
+
"whimsical": {
|
| 125 |
+
"prompt": "whimsical, playful, fantastical, magical",
|
| 126 |
+
"negative": "serious, realistic, mundane"
|
| 127 |
+
},
|
| 128 |
+
"dramatic": {
|
| 129 |
+
"prompt": "dramatic lighting, high contrast, cinematic",
|
| 130 |
+
"negative": "flat lighting, low contrast, amateur"
|
| 131 |
+
},
|
| 132 |
+
"retro": {
|
| 133 |
+
"prompt": "retro style, vintage, nostalgic, classic",
|
| 134 |
+
"negative": "modern, contemporary, futuristic"
|
| 135 |
+
}
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
QUALITY_PRESETS = {
|
| 139 |
+
"draft": {"steps": 20, "guidance": 6.0, "size": (512, 512)},
|
| 140 |
+
"standard": {"steps": 30, "guidance": 7.5, "size": (512, 512)},
|
| 141 |
+
"high": {"steps": 50, "guidance": 8.5, "size": (768, 768)}
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
# -------- 3. MODEL LOADING --------
|
| 145 |
+
|
| 146 |
+
def load_model():
|
| 147 |
+
MODEL_NAME = "runwayml/stable-diffusion-v1-5"
|
| 148 |
+
log(f"Loading model: {MODEL_NAME}")
|
| 149 |
+
|
| 150 |
+
def dummy_safety_checker(images, **kwargs):
|
| 151 |
+
return images, [False] * len(images)
|
| 152 |
+
|
| 153 |
+
try:
|
| 154 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 155 |
+
MODEL_NAME,
|
| 156 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 157 |
+
safety_checker=dummy_safety_checker,
|
| 158 |
+
)
|
| 159 |
+
pipe = pipe.to(device)
|
| 160 |
+
pipe.enable_attention_slicing()
|
| 161 |
+
log("Model loaded successfully")
|
| 162 |
+
return pipe
|
| 163 |
+
except Exception as e:
|
| 164 |
+
log(f"Error loading model: {e}")
|
| 165 |
+
sys.exit(1)
|
| 166 |
+
|
| 167 |
+
# -------- 4. INTERACTIVE FUNCTIONS --------
|
| 168 |
+
|
| 169 |
+
def list_options(options_dict, title):
|
| 170 |
+
print(f"\n{title}:")
|
| 171 |
+
for idx, (key, value) in enumerate(options_dict.items(), 1):
|
| 172 |
+
prompt_preview = value["prompt"][:50] + "..." if len(value["prompt"]) > 50 else value["prompt"]
|
| 173 |
+
print(f" {idx:2d}. {key:15s} - {prompt_preview}")
|
| 174 |
+
|
| 175 |
+
def get_user_choice(options_dict, prompt_text, allow_custom=True):
|
| 176 |
+
choice = input(f"{prompt_text}: ").strip()
|
| 177 |
+
|
| 178 |
+
if choice.isdigit():
|
| 179 |
+
idx = int(choice) - 1
|
| 180 |
+
keys = list(options_dict.keys())
|
| 181 |
+
if 0 <= idx < len(keys):
|
| 182 |
+
return keys[idx]
|
| 183 |
+
|
| 184 |
+
if choice in options_dict:
|
| 185 |
+
return choice
|
| 186 |
+
|
| 187 |
+
if allow_custom and choice:
|
| 188 |
+
return choice
|
| 189 |
+
|
| 190 |
+
return None
|
| 191 |
+
|
| 192 |
+
def interactive_mode(pipe):
|
| 193 |
+
log("Starting interactive style conditioning mode")
|
| 194 |
+
|
| 195 |
+
# Get main prompt
|
| 196 |
+
main_prompt = input("\nEnter your main scene/subject: ").strip()
|
| 197 |
+
if not main_prompt:
|
| 198 |
+
log("No prompt provided")
|
| 199 |
+
return
|
| 200 |
+
|
| 201 |
+
# Show and select style
|
| 202 |
+
list_options(STYLES, "Available Styles")
|
| 203 |
+
style_key = get_user_choice(STYLES, "Choose style (number/name/custom)")
|
| 204 |
+
|
| 205 |
+
# Show and select mood
|
| 206 |
+
list_options(MOODS, "Available Moods")
|
| 207 |
+
mood_key = get_user_choice(MOODS, "Choose mood (number/name/custom/blank)", allow_custom=True)
|
| 208 |
+
|
| 209 |
+
# Get additional parameters
|
| 210 |
+
variations = input("Number of variations (default 1): ").strip()
|
| 211 |
+
variations = int(variations) if variations.isdigit() else 1
|
| 212 |
+
|
| 213 |
+
quality = input("Quality [draft/standard/high] (default standard): ").strip()
|
| 214 |
+
quality = quality if quality in QUALITY_PRESETS else "standard"
|
| 215 |
+
|
| 216 |
+
negative = input("Negative prompt (optional): ").strip()
|
| 217 |
+
|
| 218 |
+
# Generate images
|
| 219 |
+
generate_styled_images(pipe, main_prompt, style_key, mood_key, variations, quality, negative)
|
| 220 |
+
|
| 221 |
+
# -------- 5. GENERATION FUNCTION --------
|
| 222 |
+
|
| 223 |
+
def generate_styled_images(pipe, main_prompt, style_key, mood_key, variations, quality, custom_negative=""):
|
| 224 |
+
# Build the full prompt
|
| 225 |
+
full_prompt = main_prompt
|
| 226 |
+
style_negative = ""
|
| 227 |
+
mood_negative = ""
|
| 228 |
+
|
| 229 |
+
if style_key and style_key in STYLES:
|
| 230 |
+
full_prompt += f", {STYLES[style_key]['prompt']}"
|
| 231 |
+
style_negative = STYLES[style_key]['negative']
|
| 232 |
+
elif style_key:
|
| 233 |
+
full_prompt += f", {style_key}"
|
| 234 |
+
|
| 235 |
+
if mood_key and mood_key in MOODS:
|
| 236 |
+
full_prompt += f", {MOODS[mood_key]['prompt']}"
|
| 237 |
+
mood_negative = MOODS[mood_key]['negative']
|
| 238 |
+
elif mood_key:
|
| 239 |
+
full_prompt += f", {mood_key}"
|
| 240 |
+
|
| 241 |
+
# Build negative prompt
|
| 242 |
+
negative_parts = [part for part in [style_negative, mood_negative, custom_negative] if part]
|
| 243 |
+
full_negative = ", ".join(negative_parts) if negative_parts else None
|
| 244 |
+
|
| 245 |
+
# Get quality settings
|
| 246 |
+
quality_settings = QUALITY_PRESETS[quality]
|
| 247 |
+
|
| 248 |
+
log(f"Full prompt: {full_prompt}")
|
| 249 |
+
log(f"Negative prompt: {full_negative or '[none]'}")
|
| 250 |
+
log(f"Quality: {quality} ({quality_settings['steps']} steps)")
|
| 251 |
+
log(f"Generating {variations} variation(s)")
|
| 252 |
+
|
| 253 |
+
# Generate images
|
| 254 |
+
for i in range(variations):
|
| 255 |
+
seed = torch.seed()
|
| 256 |
+
generator = torch.manual_seed(seed) if device == "cpu" else torch.Generator(device).manual_seed(seed)
|
| 257 |
+
|
| 258 |
+
with torch.autocast(device) if device == "cuda" else torch.no_grad():
|
| 259 |
+
result = pipe(
|
| 260 |
+
full_prompt,
|
| 261 |
+
negative_prompt=full_negative,
|
| 262 |
+
height=quality_settings["size"][1],
|
| 263 |
+
width=quality_settings["size"][0],
|
| 264 |
+
num_inference_steps=quality_settings["steps"],
|
| 265 |
+
guidance_scale=quality_settings["guidance"],
|
| 266 |
+
generator=generator,
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
img = result.images[0]
|
| 270 |
+
|
| 271 |
+
# Create filename
|
| 272 |
+
prompt_slug = "_".join(main_prompt.lower().split()[:4])
|
| 273 |
+
style_slug = (style_key or "nostyle").replace(" ", "")[:10]
|
| 274 |
+
mood_slug = (mood_key or "nomood").replace(" ", "")[:10]
|
| 275 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 276 |
+
|
| 277 |
+
filename = f"{prompt_slug[:20]}_{style_slug}_{mood_slug}_{quality}_{timestamp}_seed{seed}_v{i+1}.png"
|
| 278 |
+
filepath = os.path.join(OUTPUT_DIR, filename)
|
| 279 |
+
|
| 280 |
+
img.save(filepath)
|
| 281 |
+
|
| 282 |
+
# Save metadata
|
| 283 |
+
metadata = {
|
| 284 |
+
"main_prompt": main_prompt,
|
| 285 |
+
"style": style_key,
|
| 286 |
+
"mood": mood_key,
|
| 287 |
+
"full_prompt": full_prompt,
|
| 288 |
+
"negative_prompt": full_negative,
|
| 289 |
+
"quality": quality,
|
| 290 |
+
"seed": seed,
|
| 291 |
+
"variation": i + 1,
|
| 292 |
+
"timestamp": datetime.now().isoformat(),
|
| 293 |
+
"settings": quality_settings
|
| 294 |
+
}
|
| 295 |
+
|
| 296 |
+
metadata_file = filepath.replace('.png', '_metadata.json')
|
| 297 |
+
with open(metadata_file, 'w') as f:
|
| 298 |
+
json.dump(metadata, f, indent=2)
|
| 299 |
+
|
| 300 |
+
log(f"Generated variation {i+1}: {filepath}")
|
| 301 |
+
|
| 302 |
+
log(f"Phase 1.B complete - {variations} styled images generated")
|
| 303 |
+
|
| 304 |
+
# -------- 6. MAIN FUNCTION --------
|
| 305 |
+
|
| 306 |
+
def main():
|
| 307 |
+
args = setup_args()
|
| 308 |
+
|
| 309 |
+
# Handle list commands
|
| 310 |
+
if args.list_styles:
|
| 311 |
+
list_options(STYLES, "Available Styles")
|
| 312 |
+
return
|
| 313 |
+
|
| 314 |
+
if args.list_moods:
|
| 315 |
+
list_options(MOODS, "Available Moods")
|
| 316 |
+
return
|
| 317 |
+
|
| 318 |
+
# Load model
|
| 319 |
+
pipe = load_model()
|
| 320 |
+
|
| 321 |
+
# Interactive mode
|
| 322 |
+
if args.interactive:
|
| 323 |
+
interactive_mode(pipe)
|
| 324 |
+
return
|
| 325 |
+
|
| 326 |
+
# Command line mode
|
| 327 |
+
main_prompt = " ".join(args.prompt) if args.prompt else input("Enter main prompt: ").strip()
|
| 328 |
+
if not main_prompt:
|
| 329 |
+
log("No prompt provided")
|
| 330 |
+
return
|
| 331 |
+
|
| 332 |
+
generate_styled_images(
|
| 333 |
+
pipe, main_prompt, args.style, args.mood,
|
| 334 |
+
args.variations, args.quality, args.negative or ""
|
| 335 |
+
)
|
| 336 |
+
|
| 337 |
+
if __name__ == "__main__":
|
| 338 |
+
main()
|
src/generators/compi_phase1b_styled_generation.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# compi_phase1b_styled_generation.py
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
import torch
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
from diffusers import StableDiffusionPipeline
|
| 8 |
+
from PIL import Image
|
| 9 |
+
|
| 10 |
+
# Add project root to path for imports
|
| 11 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 12 |
+
|
| 13 |
+
# -------- 1. SETUP --------
|
| 14 |
+
if torch.cuda.is_available():
|
| 15 |
+
device = "cuda"
|
| 16 |
+
print("Running on CUDA GPU.")
|
| 17 |
+
else:
|
| 18 |
+
device = "cpu"
|
| 19 |
+
print("Running on CPU.")
|
| 20 |
+
|
| 21 |
+
OUTPUT_DIR = os.path.join(os.path.dirname(__file__), '..', '..', "outputs")
|
| 22 |
+
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
| 23 |
+
|
| 24 |
+
def log(msg):
|
| 25 |
+
now = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
|
| 26 |
+
print(f"{now} {msg}")
|
| 27 |
+
|
| 28 |
+
# -------- 2. LOAD MODEL --------
|
| 29 |
+
MODEL_NAME = "runwayml/stable-diffusion-v1-5"
|
| 30 |
+
log(f"Loading model: {MODEL_NAME}")
|
| 31 |
+
|
| 32 |
+
def dummy_safety_checker(images, **kwargs):
|
| 33 |
+
return images, [False] * len(images)
|
| 34 |
+
|
| 35 |
+
try:
|
| 36 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 37 |
+
MODEL_NAME,
|
| 38 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 39 |
+
safety_checker=dummy_safety_checker,
|
| 40 |
+
)
|
| 41 |
+
except Exception as e:
|
| 42 |
+
log(f"Error loading model: {e}")
|
| 43 |
+
sys.exit(1)
|
| 44 |
+
|
| 45 |
+
pipe = pipe.to(device)
|
| 46 |
+
pipe.enable_attention_slicing()
|
| 47 |
+
|
| 48 |
+
log("Model loaded.")
|
| 49 |
+
|
| 50 |
+
# -------- 3. STYLE & MOOD PROMPT ENGINEERING --------
|
| 51 |
+
|
| 52 |
+
# Predefined styles and moods (add more as desired)
|
| 53 |
+
STYLES = [
|
| 54 |
+
"digital art",
|
| 55 |
+
"oil painting",
|
| 56 |
+
"watercolor",
|
| 57 |
+
"cyberpunk",
|
| 58 |
+
"impressionist",
|
| 59 |
+
"concept art",
|
| 60 |
+
"anime",
|
| 61 |
+
"photorealistic",
|
| 62 |
+
"minimalist",
|
| 63 |
+
"surrealism",
|
| 64 |
+
"pixel art",
|
| 65 |
+
"steampunk",
|
| 66 |
+
"3d render"
|
| 67 |
+
]
|
| 68 |
+
|
| 69 |
+
MOODS = [
|
| 70 |
+
"dreamy atmosphere",
|
| 71 |
+
"dark and moody",
|
| 72 |
+
"peaceful",
|
| 73 |
+
"vibrant and energetic",
|
| 74 |
+
"melancholic",
|
| 75 |
+
"mysterious",
|
| 76 |
+
"whimsical",
|
| 77 |
+
"serene",
|
| 78 |
+
"uplifting",
|
| 79 |
+
"dramatic lighting",
|
| 80 |
+
"retro"
|
| 81 |
+
]
|
| 82 |
+
|
| 83 |
+
def main():
|
| 84 |
+
"""Main function for command-line execution"""
|
| 85 |
+
# Input: main prompt
|
| 86 |
+
if len(sys.argv) > 1:
|
| 87 |
+
main_prompt = " ".join(sys.argv[1:])
|
| 88 |
+
log(f"Prompt from command line: {main_prompt}")
|
| 89 |
+
else:
|
| 90 |
+
main_prompt = input("Enter your main scene/subject (e.g., 'A forest of bioluminescent trees'): ").strip()
|
| 91 |
+
|
| 92 |
+
if not main_prompt:
|
| 93 |
+
log("No main prompt entered. Exiting.")
|
| 94 |
+
sys.exit(0)
|
| 95 |
+
|
| 96 |
+
# Style selector
|
| 97 |
+
print("\nChoose an art style from the list or enter your own:")
|
| 98 |
+
for idx, style in enumerate(STYLES, 1):
|
| 99 |
+
print(f" {idx}. {style}")
|
| 100 |
+
style_choice = input(f"Enter style number [1-{len(STYLES)}] or type your own: ").strip()
|
| 101 |
+
if style_choice.isdigit() and 1 <= int(style_choice) <= len(STYLES):
|
| 102 |
+
style = STYLES[int(style_choice)-1]
|
| 103 |
+
else:
|
| 104 |
+
style = style_choice if style_choice else STYLES[0]
|
| 105 |
+
log(f"Style selected: {style}")
|
| 106 |
+
|
| 107 |
+
# Mood selector
|
| 108 |
+
print("\nChoose a mood from the list or enter your own:")
|
| 109 |
+
for idx, mood in enumerate(MOODS, 1):
|
| 110 |
+
print(f" {idx}. {mood}")
|
| 111 |
+
mood_choice = input(f"Enter mood number [1-{len(MOODS)}] or type your own (or leave blank): ").strip()
|
| 112 |
+
if mood_choice.isdigit() and 1 <= int(mood_choice) <= len(MOODS):
|
| 113 |
+
mood = MOODS[int(mood_choice)-1]
|
| 114 |
+
elif mood_choice:
|
| 115 |
+
mood = mood_choice
|
| 116 |
+
else:
|
| 117 |
+
mood = ""
|
| 118 |
+
log(f"Mood selected: {mood if mood else '[none]'}")
|
| 119 |
+
|
| 120 |
+
# Combine all for final prompt
|
| 121 |
+
full_prompt = main_prompt
|
| 122 |
+
if style: full_prompt += f", {style}"
|
| 123 |
+
if mood: full_prompt += f", {mood}"
|
| 124 |
+
log(f"Full prompt: {full_prompt}")
|
| 125 |
+
|
| 126 |
+
# -------- 4. GENERATION PARAMETERS --------
|
| 127 |
+
|
| 128 |
+
NUM_VARIATIONS = input("How many variations to generate? (default 1): ").strip()
|
| 129 |
+
try:
|
| 130 |
+
NUM_VARIATIONS = max(1, int(NUM_VARIATIONS))
|
| 131 |
+
except Exception:
|
| 132 |
+
NUM_VARIATIONS = 1
|
| 133 |
+
|
| 134 |
+
INFERENCE_STEPS = 30
|
| 135 |
+
GUIDANCE_SCALE = 7.5
|
| 136 |
+
HEIGHT = 512
|
| 137 |
+
WIDTH = 512
|
| 138 |
+
|
| 139 |
+
# -------- 5. IMAGE GENERATION --------
|
| 140 |
+
|
| 141 |
+
log(f"Generating {NUM_VARIATIONS} image(s) for prompt: '{full_prompt}'")
|
| 142 |
+
images = []
|
| 143 |
+
|
| 144 |
+
for i in range(NUM_VARIATIONS):
|
| 145 |
+
seed = torch.seed() # random seed for each variation
|
| 146 |
+
generator = torch.manual_seed(seed) if device == "cpu" else torch.Generator(device).manual_seed(seed)
|
| 147 |
+
with torch.autocast(device) if device == "cuda" else torch.no_grad():
|
| 148 |
+
result = pipe(
|
| 149 |
+
full_prompt,
|
| 150 |
+
height=HEIGHT,
|
| 151 |
+
width=WIDTH,
|
| 152 |
+
num_inference_steps=INFERENCE_STEPS,
|
| 153 |
+
guidance_scale=GUIDANCE_SCALE,
|
| 154 |
+
generator=generator,
|
| 155 |
+
)
|
| 156 |
+
img: Image.Image = result.images[0]
|
| 157 |
+
# Compose filename
|
| 158 |
+
prompt_slug = "_".join(main_prompt.lower().split()[:5])
|
| 159 |
+
style_slug = style.replace(" ", "")[:10]
|
| 160 |
+
mood_slug = mood.replace(" ", "")[:10] if mood else "none"
|
| 161 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 162 |
+
fname = f"{prompt_slug[:25]}_{style_slug}_{mood_slug}_{timestamp}_seed{seed}_v{i+1}.png"
|
| 163 |
+
fpath = os.path.join(OUTPUT_DIR, fname)
|
| 164 |
+
img.save(fpath)
|
| 165 |
+
log(f"Image saved: {fpath}")
|
| 166 |
+
images.append(fpath)
|
| 167 |
+
|
| 168 |
+
log(f"All {NUM_VARIATIONS} images generated and saved.")
|
| 169 |
+
log("Phase 1.B complete.")
|
| 170 |
+
|
| 171 |
+
if __name__ == "__main__":
|
| 172 |
+
main()
|
src/generators/compi_phase1d_cli_evaluation.py
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 1.D: Command-Line Quality Evaluation Tool
|
| 4 |
+
|
| 5 |
+
Command-line interface for batch evaluation and analysis of generated images.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
python src/generators/compi_phase1d_cli_evaluation.py --help
|
| 9 |
+
python src/generators/compi_phase1d_cli_evaluation.py --analyze
|
| 10 |
+
python src/generators/compi_phase1d_cli_evaluation.py --batch-score 4 3 4 4 3
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
import argparse
|
| 15 |
+
import json
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Dict, List
|
| 19 |
+
|
| 20 |
+
import pandas as pd
|
| 21 |
+
from PIL import Image
|
| 22 |
+
|
| 23 |
+
# Import functions from the main evaluation module
|
| 24 |
+
from compi_phase1d_evaluate_quality import (
|
| 25 |
+
parse_filename, get_image_metrics, load_existing_evaluations,
|
| 26 |
+
save_evaluation, EVALUATION_CRITERIA, OUTPUT_DIR, EVAL_CSV
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
def setup_args():
|
| 30 |
+
"""Setup command line arguments."""
|
| 31 |
+
parser = argparse.ArgumentParser(
|
| 32 |
+
description="CompI Phase 1.D: Command-Line Quality Evaluation",
|
| 33 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 34 |
+
epilog="""
|
| 35 |
+
Examples:
|
| 36 |
+
# Analyze existing evaluations
|
| 37 |
+
python %(prog)s --analyze
|
| 38 |
+
|
| 39 |
+
# Batch score all unevaluated images (prompt_match, style, mood, quality, appeal)
|
| 40 |
+
python %(prog)s --batch-score 4 3 4 4 3 --notes "Batch evaluation - good quality"
|
| 41 |
+
|
| 42 |
+
# Generate detailed report
|
| 43 |
+
python %(prog)s --report --output evaluation_report.txt
|
| 44 |
+
|
| 45 |
+
# List unevaluated images
|
| 46 |
+
python %(prog)s --list-unevaluated
|
| 47 |
+
"""
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
parser.add_argument("--output-dir", default=OUTPUT_DIR,
|
| 51 |
+
help="Directory containing generated images")
|
| 52 |
+
|
| 53 |
+
# Analysis commands
|
| 54 |
+
parser.add_argument("--analyze", action="store_true",
|
| 55 |
+
help="Display evaluation summary and statistics")
|
| 56 |
+
|
| 57 |
+
parser.add_argument("--report", action="store_true",
|
| 58 |
+
help="Generate detailed evaluation report")
|
| 59 |
+
|
| 60 |
+
parser.add_argument("--output", "-o",
|
| 61 |
+
help="Output file for report (default: stdout)")
|
| 62 |
+
|
| 63 |
+
# Batch evaluation
|
| 64 |
+
parser.add_argument("--batch-score", nargs=5, type=int, metavar=("PROMPT", "STYLE", "MOOD", "QUALITY", "APPEAL"),
|
| 65 |
+
help="Batch score all unevaluated images (1-5 for each criteria)")
|
| 66 |
+
|
| 67 |
+
parser.add_argument("--notes", default="CLI batch evaluation",
|
| 68 |
+
help="Notes for batch evaluation")
|
| 69 |
+
|
| 70 |
+
# Listing commands
|
| 71 |
+
parser.add_argument("--list-all", action="store_true",
|
| 72 |
+
help="List all images with evaluation status")
|
| 73 |
+
|
| 74 |
+
parser.add_argument("--list-evaluated", action="store_true",
|
| 75 |
+
help="List only evaluated images")
|
| 76 |
+
|
| 77 |
+
parser.add_argument("--list-unevaluated", action="store_true",
|
| 78 |
+
help="List only unevaluated images")
|
| 79 |
+
|
| 80 |
+
# Filtering
|
| 81 |
+
parser.add_argument("--style", help="Filter by style")
|
| 82 |
+
parser.add_argument("--mood", help="Filter by mood")
|
| 83 |
+
|
| 84 |
+
return parser.parse_args()
|
| 85 |
+
|
| 86 |
+
def load_images(output_dir: str) -> List[Dict]:
|
| 87 |
+
"""Load and parse all images from output directory."""
|
| 88 |
+
if not os.path.exists(output_dir):
|
| 89 |
+
print(f"❌ Output directory '{output_dir}' not found!")
|
| 90 |
+
return []
|
| 91 |
+
|
| 92 |
+
image_files = [f for f in os.listdir(output_dir) if f.lower().endswith('.png')]
|
| 93 |
+
parsed_images = []
|
| 94 |
+
|
| 95 |
+
for fname in image_files:
|
| 96 |
+
metadata = parse_filename(fname)
|
| 97 |
+
if metadata:
|
| 98 |
+
parsed_images.append(metadata)
|
| 99 |
+
|
| 100 |
+
return parsed_images
|
| 101 |
+
|
| 102 |
+
def filter_images(images: List[Dict], style: str = None, mood: str = None) -> List[Dict]:
|
| 103 |
+
"""Filter images by style and/or mood."""
|
| 104 |
+
filtered = images
|
| 105 |
+
|
| 106 |
+
if style:
|
| 107 |
+
filtered = [img for img in filtered if img.get('style', '').lower() == style.lower()]
|
| 108 |
+
|
| 109 |
+
if mood:
|
| 110 |
+
filtered = [img for img in filtered if img.get('mood', '').lower() == mood.lower()]
|
| 111 |
+
|
| 112 |
+
return filtered
|
| 113 |
+
|
| 114 |
+
def analyze_evaluations(existing_evals: Dict):
|
| 115 |
+
"""Display evaluation analysis."""
|
| 116 |
+
if not existing_evals:
|
| 117 |
+
print("❌ No evaluations found.")
|
| 118 |
+
return
|
| 119 |
+
|
| 120 |
+
df = pd.DataFrame.from_dict(existing_evals, orient='index')
|
| 121 |
+
|
| 122 |
+
print("📊 CompI Phase 1.D - Evaluation Analysis")
|
| 123 |
+
print("=" * 50)
|
| 124 |
+
print(f"Total Evaluated Images: {len(df)}")
|
| 125 |
+
print()
|
| 126 |
+
|
| 127 |
+
# Score statistics
|
| 128 |
+
print("📈 Score Statistics:")
|
| 129 |
+
for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
|
| 130 |
+
if criterion_key in df.columns:
|
| 131 |
+
mean_score = df[criterion_key].mean()
|
| 132 |
+
std_score = df[criterion_key].std()
|
| 133 |
+
min_score = df[criterion_key].min()
|
| 134 |
+
max_score = df[criterion_key].max()
|
| 135 |
+
|
| 136 |
+
print(f" {criterion_info['name']:20}: {mean_score:.2f} ± {std_score:.2f} (range: {min_score}-{max_score})")
|
| 137 |
+
|
| 138 |
+
print()
|
| 139 |
+
|
| 140 |
+
# Style analysis
|
| 141 |
+
if 'style' in df.columns and 'prompt_match' in df.columns:
|
| 142 |
+
print("🎨 Top Performing Styles (by Prompt Match):")
|
| 143 |
+
style_scores = df.groupby('style')['prompt_match'].mean().sort_values(ascending=False)
|
| 144 |
+
for style, score in style_scores.head(5).items():
|
| 145 |
+
print(f" {style:15}: {score:.2f}")
|
| 146 |
+
print()
|
| 147 |
+
|
| 148 |
+
# Mood analysis
|
| 149 |
+
if 'mood' in df.columns and 'creative_appeal' in df.columns:
|
| 150 |
+
print("🌟 Top Performing Moods (by Creative Appeal):")
|
| 151 |
+
mood_scores = df.groupby('mood')['creative_appeal'].mean().sort_values(ascending=False)
|
| 152 |
+
for mood, score in mood_scores.head(5).items():
|
| 153 |
+
print(f" {mood:15}: {score:.2f}")
|
| 154 |
+
print()
|
| 155 |
+
|
| 156 |
+
def generate_detailed_report(existing_evals: Dict) -> str:
|
| 157 |
+
"""Generate detailed evaluation report."""
|
| 158 |
+
if not existing_evals:
|
| 159 |
+
return "No evaluations found."
|
| 160 |
+
|
| 161 |
+
df = pd.DataFrame.from_dict(existing_evals, orient='index')
|
| 162 |
+
|
| 163 |
+
report_lines = [
|
| 164 |
+
"# CompI Phase 1.D - Detailed Evaluation Report",
|
| 165 |
+
f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
| 166 |
+
f"Total Images Evaluated: {len(df)}",
|
| 167 |
+
"",
|
| 168 |
+
"## Overall Performance Summary"
|
| 169 |
+
]
|
| 170 |
+
|
| 171 |
+
# Overall statistics
|
| 172 |
+
for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
|
| 173 |
+
if criterion_key in df.columns:
|
| 174 |
+
mean_score = df[criterion_key].mean()
|
| 175 |
+
std_score = df[criterion_key].std()
|
| 176 |
+
report_lines.append(f"- **{criterion_info['name']}**: {mean_score:.2f} ± {std_score:.2f}")
|
| 177 |
+
|
| 178 |
+
# Distribution analysis
|
| 179 |
+
report_lines.extend([
|
| 180 |
+
"",
|
| 181 |
+
"## Score Distribution Analysis"
|
| 182 |
+
])
|
| 183 |
+
|
| 184 |
+
for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
|
| 185 |
+
if criterion_key in df.columns:
|
| 186 |
+
scores = df[criterion_key]
|
| 187 |
+
report_lines.extend([
|
| 188 |
+
f"",
|
| 189 |
+
f"### {criterion_info['name']}",
|
| 190 |
+
f"- Mean: {scores.mean():.2f}",
|
| 191 |
+
f"- Median: {scores.median():.2f}",
|
| 192 |
+
f"- Mode: {scores.mode().iloc[0] if not scores.mode().empty else 'N/A'}",
|
| 193 |
+
f"- Range: {scores.min()}-{scores.max()}",
|
| 194 |
+
f"- Distribution: " + " | ".join([f"{i}★: {(scores == i).sum()}" for i in range(1, 6)])
|
| 195 |
+
])
|
| 196 |
+
|
| 197 |
+
# Style/Mood performance
|
| 198 |
+
if 'style' in df.columns:
|
| 199 |
+
report_lines.extend([
|
| 200 |
+
"",
|
| 201 |
+
"## Style Performance Analysis"
|
| 202 |
+
])
|
| 203 |
+
|
| 204 |
+
for criterion_key in EVALUATION_CRITERIA.keys():
|
| 205 |
+
if criterion_key in df.columns:
|
| 206 |
+
style_performance = df.groupby('style')[criterion_key].agg(['mean', 'count']).sort_values('mean', ascending=False)
|
| 207 |
+
report_lines.extend([
|
| 208 |
+
f"",
|
| 209 |
+
f"### {EVALUATION_CRITERIA[criterion_key]['name']} by Style",
|
| 210 |
+
])
|
| 211 |
+
|
| 212 |
+
for style, (mean_score, count) in style_performance.iterrows():
|
| 213 |
+
report_lines.append(f"- {style}: {mean_score:.2f} (n={count})")
|
| 214 |
+
|
| 215 |
+
# Recommendations
|
| 216 |
+
report_lines.extend([
|
| 217 |
+
"",
|
| 218 |
+
"## Recommendations",
|
| 219 |
+
"",
|
| 220 |
+
"### Areas for Improvement"
|
| 221 |
+
])
|
| 222 |
+
|
| 223 |
+
# Find lowest scoring criteria
|
| 224 |
+
criterion_means = {}
|
| 225 |
+
for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
|
| 226 |
+
if criterion_key in df.columns:
|
| 227 |
+
criterion_means[criterion_info['name']] = df[criterion_key].mean()
|
| 228 |
+
|
| 229 |
+
if criterion_means:
|
| 230 |
+
lowest_criteria = sorted(criterion_means.items(), key=lambda x: x[1])[:2]
|
| 231 |
+
for criterion_name, score in lowest_criteria:
|
| 232 |
+
report_lines.append(f"- Focus on improving **{criterion_name}** (current: {score:.2f}/5)")
|
| 233 |
+
|
| 234 |
+
report_lines.extend([
|
| 235 |
+
"",
|
| 236 |
+
"### Best Practices",
|
| 237 |
+
"- Continue systematic evaluation for trend analysis",
|
| 238 |
+
"- Experiment with parameter adjustments for low-scoring areas",
|
| 239 |
+
"- Consider A/B testing different generation approaches",
|
| 240 |
+
"- Document successful style/mood combinations for reuse"
|
| 241 |
+
])
|
| 242 |
+
|
| 243 |
+
return "\n".join(report_lines)
|
| 244 |
+
|
| 245 |
+
def batch_evaluate_images(images: List[Dict], scores: List[int], notes: str, output_dir: str):
|
| 246 |
+
"""Batch evaluate unevaluated images."""
|
| 247 |
+
existing_evals = load_existing_evaluations()
|
| 248 |
+
unevaluated = [img for img in images if img['filename'] not in existing_evals]
|
| 249 |
+
|
| 250 |
+
if not unevaluated:
|
| 251 |
+
print("✅ All images are already evaluated!")
|
| 252 |
+
return
|
| 253 |
+
|
| 254 |
+
print(f"📦 Batch evaluating {len(unevaluated)} images...")
|
| 255 |
+
|
| 256 |
+
# Map scores to criteria
|
| 257 |
+
criteria_keys = list(EVALUATION_CRITERIA.keys())
|
| 258 |
+
score_dict = dict(zip(criteria_keys, scores))
|
| 259 |
+
|
| 260 |
+
for i, img_data in enumerate(unevaluated):
|
| 261 |
+
fname = img_data["filename"]
|
| 262 |
+
img_path = os.path.join(output_dir, fname)
|
| 263 |
+
|
| 264 |
+
try:
|
| 265 |
+
metrics = get_image_metrics(img_path)
|
| 266 |
+
save_evaluation(fname, img_data, score_dict, notes, metrics)
|
| 267 |
+
print(f" ✅ Evaluated: {fname}")
|
| 268 |
+
except Exception as e:
|
| 269 |
+
print(f" ❌ Error evaluating {fname}: {e}")
|
| 270 |
+
|
| 271 |
+
print(f"🎉 Batch evaluation completed!")
|
| 272 |
+
|
| 273 |
+
def list_images(images: List[Dict], existing_evals: Dict, show_evaluated: bool = True, show_unevaluated: bool = True):
|
| 274 |
+
"""List images with evaluation status."""
|
| 275 |
+
print(f"📋 Image List ({len(images)} total)")
|
| 276 |
+
print("-" * 80)
|
| 277 |
+
|
| 278 |
+
for img_data in images:
|
| 279 |
+
fname = img_data["filename"]
|
| 280 |
+
is_evaluated = fname in existing_evals
|
| 281 |
+
|
| 282 |
+
if (show_evaluated and is_evaluated) or (show_unevaluated and not is_evaluated):
|
| 283 |
+
status = "✅" if is_evaluated else "❌"
|
| 284 |
+
prompt = img_data.get('prompt', 'unknown')[:30]
|
| 285 |
+
style = img_data.get('style', 'unknown')[:15]
|
| 286 |
+
mood = img_data.get('mood', 'unknown')[:15]
|
| 287 |
+
|
| 288 |
+
print(f"{status} {fname}")
|
| 289 |
+
print(f" Prompt: {prompt}... | Style: {style} | Mood: {mood}")
|
| 290 |
+
|
| 291 |
+
if is_evaluated:
|
| 292 |
+
eval_data = existing_evals[fname]
|
| 293 |
+
scores = [f"{k}:{eval_data.get(k, 'N/A')}" for k in EVALUATION_CRITERIA.keys() if k in eval_data]
|
| 294 |
+
print(f" Scores: {' | '.join(scores[:3])}...")
|
| 295 |
+
print()
|
| 296 |
+
|
| 297 |
+
def main():
|
| 298 |
+
"""Main CLI function."""
|
| 299 |
+
args = setup_args()
|
| 300 |
+
|
| 301 |
+
# Load images
|
| 302 |
+
images = load_images(args.output_dir)
|
| 303 |
+
if not images:
|
| 304 |
+
return
|
| 305 |
+
|
| 306 |
+
# Apply filters
|
| 307 |
+
images = filter_images(images, args.style, args.mood)
|
| 308 |
+
|
| 309 |
+
# Load existing evaluations
|
| 310 |
+
existing_evals = load_existing_evaluations()
|
| 311 |
+
|
| 312 |
+
# Execute commands
|
| 313 |
+
if args.analyze:
|
| 314 |
+
analyze_evaluations(existing_evals)
|
| 315 |
+
|
| 316 |
+
elif args.report:
|
| 317 |
+
report = generate_detailed_report(existing_evals)
|
| 318 |
+
if args.output:
|
| 319 |
+
with open(args.output, 'w', encoding='utf-8') as f:
|
| 320 |
+
f.write(report)
|
| 321 |
+
print(f"📄 Report saved to: {args.output}")
|
| 322 |
+
else:
|
| 323 |
+
print(report)
|
| 324 |
+
|
| 325 |
+
elif args.batch_score:
|
| 326 |
+
batch_evaluate_images(images, args.batch_score, args.notes, args.output_dir)
|
| 327 |
+
|
| 328 |
+
elif args.list_all:
|
| 329 |
+
list_images(images, existing_evals, True, True)
|
| 330 |
+
|
| 331 |
+
elif args.list_evaluated:
|
| 332 |
+
list_images(images, existing_evals, True, False)
|
| 333 |
+
|
| 334 |
+
elif args.list_unevaluated:
|
| 335 |
+
list_images(images, existing_evals, False, True)
|
| 336 |
+
|
| 337 |
+
else:
|
| 338 |
+
print("❓ No command specified. Use --help for usage information.")
|
| 339 |
+
|
| 340 |
+
if __name__ == "__main__":
|
| 341 |
+
main()
|
src/generators/compi_phase1d_evaluate_quality.py
ADDED
|
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 1.D: Baseline Output Quality Evaluation Tool
|
| 4 |
+
|
| 5 |
+
This tool provides systematic evaluation of generated images with:
|
| 6 |
+
- Visual quality assessment
|
| 7 |
+
- Prompt adherence scoring
|
| 8 |
+
- Style/mood consistency evaluation
|
| 9 |
+
- Objective metrics calculation
|
| 10 |
+
- Comprehensive logging and tracking
|
| 11 |
+
|
| 12 |
+
Usage:
|
| 13 |
+
python src/generators/compi_phase1d_evaluate_quality.py
|
| 14 |
+
# Or via wrapper: python run_evaluation.py
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import os
|
| 18 |
+
import re
|
| 19 |
+
import csv
|
| 20 |
+
import json
|
| 21 |
+
from datetime import datetime
|
| 22 |
+
from pathlib import Path
|
| 23 |
+
from typing import Dict, List, Optional, Tuple
|
| 24 |
+
import argparse
|
| 25 |
+
|
| 26 |
+
import streamlit as st
|
| 27 |
+
from PIL import Image
|
| 28 |
+
import imagehash
|
| 29 |
+
import pandas as pd
|
| 30 |
+
|
| 31 |
+
# -------- 1. CONFIGURATION --------
|
| 32 |
+
|
| 33 |
+
OUTPUT_DIR = "outputs"
|
| 34 |
+
EVAL_CSV = "outputs/evaluation_log.csv"
|
| 35 |
+
EVAL_SUMMARY = "outputs/evaluation_summary.json"
|
| 36 |
+
|
| 37 |
+
# Filename patterns for different CompI phases
|
| 38 |
+
FILENAME_PATTERNS = [
|
| 39 |
+
# Phase 1.B Advanced styling: prompt_style_mood_timestamp_seed_variation
|
| 40 |
+
re.compile(r"^(?P<prompt>[a-z0-9_,]+)_(?P<style>[a-zA-Z0-9]+)_(?P<mood>[a-zA-Z0-9]+)_(?P<timestamp>\d{8}_\d{6})_seed(?P<seed>\d+)_v(?P<variation>\d+)\.png$"),
|
| 41 |
+
# Phase 1.A Basic generation: prompt_timestamp_seed
|
| 42 |
+
re.compile(r"^(?P<prompt>[a-z0-9_,]+)_(?P<timestamp>\d{8}_\d{6})_seed(?P<seed>\d+)\.png$"),
|
| 43 |
+
# Alternative pattern: prompt_style_timestamp_seed
|
| 44 |
+
re.compile(r"^(?P<prompt>[a-z0-9_,]+)_(?P<style>[a-zA-Z0-9]+)_(?P<timestamp>\d{8}_\d{6})_seed(?P<seed>\d+)\.png$"),
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
# Evaluation criteria
|
| 48 |
+
EVALUATION_CRITERIA = {
|
| 49 |
+
"prompt_match": {
|
| 50 |
+
"name": "Prompt Adherence",
|
| 51 |
+
"description": "How well does the image match the text prompt?",
|
| 52 |
+
"scale": "1=Poor match, 3=Good match, 5=Perfect match"
|
| 53 |
+
},
|
| 54 |
+
"style_consistency": {
|
| 55 |
+
"name": "Style Consistency",
|
| 56 |
+
"description": "How well does the image reflect the intended artistic style?",
|
| 57 |
+
"scale": "1=Style not evident, 3=Style present, 5=Style perfectly executed"
|
| 58 |
+
},
|
| 59 |
+
"mood_atmosphere": {
|
| 60 |
+
"name": "Mood & Atmosphere",
|
| 61 |
+
"description": "How well does the image convey the intended mood/atmosphere?",
|
| 62 |
+
"scale": "1=Wrong mood, 3=Neutral/adequate, 5=Perfect mood"
|
| 63 |
+
},
|
| 64 |
+
"technical_quality": {
|
| 65 |
+
"name": "Technical Quality",
|
| 66 |
+
"description": "Overall image quality (resolution, composition, artifacts)",
|
| 67 |
+
"scale": "1=Poor quality, 3=Acceptable, 5=Excellent quality"
|
| 68 |
+
},
|
| 69 |
+
"creative_appeal": {
|
| 70 |
+
"name": "Creative Appeal",
|
| 71 |
+
"description": "Subjective aesthetic and creative value",
|
| 72 |
+
"scale": "1=Unappealing, 3=Decent, 5=Highly appealing"
|
| 73 |
+
}
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
# -------- 2. UTILITY FUNCTIONS --------
|
| 77 |
+
|
| 78 |
+
def parse_filename(filename: str) -> Optional[Dict]:
|
| 79 |
+
"""Parse filename to extract metadata using multiple patterns."""
|
| 80 |
+
for pattern in FILENAME_PATTERNS:
|
| 81 |
+
match = pattern.match(filename)
|
| 82 |
+
if match:
|
| 83 |
+
data = match.groupdict()
|
| 84 |
+
data["filename"] = filename
|
| 85 |
+
# Set defaults for missing fields
|
| 86 |
+
data.setdefault("style", "unknown")
|
| 87 |
+
data.setdefault("mood", "unknown")
|
| 88 |
+
data.setdefault("variation", "1")
|
| 89 |
+
return data
|
| 90 |
+
return None
|
| 91 |
+
|
| 92 |
+
def get_image_metrics(image_path: str) -> Dict:
|
| 93 |
+
"""Calculate objective image metrics."""
|
| 94 |
+
try:
|
| 95 |
+
img = Image.open(image_path)
|
| 96 |
+
file_size = os.path.getsize(image_path)
|
| 97 |
+
|
| 98 |
+
# Perceptual hashes for similarity detection
|
| 99 |
+
phash = str(imagehash.phash(img))
|
| 100 |
+
dhash = str(imagehash.dhash(img))
|
| 101 |
+
|
| 102 |
+
# Basic image stats
|
| 103 |
+
width, height = img.size
|
| 104 |
+
aspect_ratio = width / height
|
| 105 |
+
|
| 106 |
+
# Color analysis
|
| 107 |
+
if img.mode == 'RGB':
|
| 108 |
+
colors = img.getcolors(maxcolors=256*256*256)
|
| 109 |
+
unique_colors = len(colors) if colors else 0
|
| 110 |
+
else:
|
| 111 |
+
unique_colors = 0
|
| 112 |
+
|
| 113 |
+
return {
|
| 114 |
+
"width": width,
|
| 115 |
+
"height": height,
|
| 116 |
+
"aspect_ratio": round(aspect_ratio, 3),
|
| 117 |
+
"file_size_kb": round(file_size / 1024, 2),
|
| 118 |
+
"unique_colors": unique_colors,
|
| 119 |
+
"phash": phash,
|
| 120 |
+
"dhash": dhash,
|
| 121 |
+
"format": img.format,
|
| 122 |
+
"mode": img.mode
|
| 123 |
+
}
|
| 124 |
+
except Exception as e:
|
| 125 |
+
return {"error": str(e)}
|
| 126 |
+
|
| 127 |
+
def load_existing_evaluations() -> Dict:
|
| 128 |
+
"""Load existing evaluations from CSV."""
|
| 129 |
+
if not os.path.exists(EVAL_CSV):
|
| 130 |
+
return {}
|
| 131 |
+
|
| 132 |
+
try:
|
| 133 |
+
df = pd.read_csv(EVAL_CSV)
|
| 134 |
+
return df.set_index('filename').to_dict('index')
|
| 135 |
+
except Exception:
|
| 136 |
+
return {}
|
| 137 |
+
|
| 138 |
+
def save_evaluation(filename: str, metadata: Dict, scores: Dict, notes: str, metrics: Dict):
|
| 139 |
+
"""Save evaluation to CSV file."""
|
| 140 |
+
# Prepare row data
|
| 141 |
+
row_data = {
|
| 142 |
+
"filename": filename,
|
| 143 |
+
"timestamp": datetime.now().isoformat(),
|
| 144 |
+
"prompt": metadata.get("prompt", ""),
|
| 145 |
+
"style": metadata.get("style", ""),
|
| 146 |
+
"mood": metadata.get("mood", ""),
|
| 147 |
+
"seed": metadata.get("seed", ""),
|
| 148 |
+
"variation": metadata.get("variation", ""),
|
| 149 |
+
"generation_timestamp": metadata.get("timestamp", ""),
|
| 150 |
+
"notes": notes,
|
| 151 |
+
**scores, # Add all evaluation scores
|
| 152 |
+
**{f"metric_{k}": v for k, v in metrics.items() if k != "error"} # Add metrics with prefix
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
# Create CSV if it doesn't exist
|
| 156 |
+
file_exists = os.path.exists(EVAL_CSV)
|
| 157 |
+
|
| 158 |
+
with open(EVAL_CSV, "a", newline='', encoding='utf-8') as f:
|
| 159 |
+
fieldnames = list(row_data.keys())
|
| 160 |
+
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
| 161 |
+
|
| 162 |
+
if not file_exists:
|
| 163 |
+
writer.writeheader()
|
| 164 |
+
writer.writerow(row_data)
|
| 165 |
+
|
| 166 |
+
# -------- 3. STREAMLIT UI --------
|
| 167 |
+
|
| 168 |
+
def main():
|
| 169 |
+
st.set_page_config(
|
| 170 |
+
page_title="CompI - Quality Evaluation",
|
| 171 |
+
layout="wide",
|
| 172 |
+
initial_sidebar_state="expanded"
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
st.title("🕵️ CompI Phase 1.D: Baseline Output Quality Evaluation")
|
| 176 |
+
|
| 177 |
+
st.markdown("""
|
| 178 |
+
**Systematic evaluation tool for CompI-generated images**
|
| 179 |
+
|
| 180 |
+
This tool helps you:
|
| 181 |
+
- 📊 Assess image quality across multiple criteria
|
| 182 |
+
- 📈 Track improvements over time
|
| 183 |
+
- 🔍 Calculate objective metrics
|
| 184 |
+
- 📝 Maintain detailed evaluation logs
|
| 185 |
+
""")
|
| 186 |
+
|
| 187 |
+
# Sidebar configuration
|
| 188 |
+
with st.sidebar:
|
| 189 |
+
st.header("⚙️ Configuration")
|
| 190 |
+
|
| 191 |
+
# Output directory selection
|
| 192 |
+
output_dir = st.text_input("Output Directory", OUTPUT_DIR)
|
| 193 |
+
|
| 194 |
+
# Evaluation mode
|
| 195 |
+
eval_mode = st.selectbox(
|
| 196 |
+
"Evaluation Mode",
|
| 197 |
+
["Single Image Review", "Batch Evaluation", "Summary Analysis"]
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
# Filter options
|
| 201 |
+
st.subheader("🔍 Filters")
|
| 202 |
+
show_evaluated = st.checkbox("Show already evaluated", True)
|
| 203 |
+
show_unevaluated = st.checkbox("Show unevaluated", True)
|
| 204 |
+
|
| 205 |
+
# Load images
|
| 206 |
+
if not os.path.exists(output_dir):
|
| 207 |
+
st.error(f"Output directory '{output_dir}' not found!")
|
| 208 |
+
return
|
| 209 |
+
|
| 210 |
+
image_files = [f for f in os.listdir(output_dir) if f.lower().endswith('.png')]
|
| 211 |
+
parsed_images = []
|
| 212 |
+
|
| 213 |
+
for fname in image_files:
|
| 214 |
+
metadata = parse_filename(fname)
|
| 215 |
+
if metadata:
|
| 216 |
+
parsed_images.append(metadata)
|
| 217 |
+
|
| 218 |
+
if not parsed_images:
|
| 219 |
+
st.warning("No CompI-generated images found with recognizable filename patterns.")
|
| 220 |
+
st.info("Expected patterns: prompt_style_mood_timestamp_seed_variation.png")
|
| 221 |
+
return
|
| 222 |
+
|
| 223 |
+
# Load existing evaluations
|
| 224 |
+
existing_evals = load_existing_evaluations()
|
| 225 |
+
|
| 226 |
+
# Filter images based on evaluation status
|
| 227 |
+
filtered_images = []
|
| 228 |
+
for img_data in parsed_images:
|
| 229 |
+
fname = img_data["filename"]
|
| 230 |
+
is_evaluated = fname in existing_evals
|
| 231 |
+
|
| 232 |
+
if (show_evaluated and is_evaluated) or (show_unevaluated and not is_evaluated):
|
| 233 |
+
img_data["is_evaluated"] = is_evaluated
|
| 234 |
+
filtered_images.append(img_data)
|
| 235 |
+
|
| 236 |
+
st.info(f"Found {len(filtered_images)} images matching your filters")
|
| 237 |
+
|
| 238 |
+
# Main evaluation interface
|
| 239 |
+
if eval_mode == "Single Image Review":
|
| 240 |
+
single_image_evaluation(filtered_images, existing_evals, output_dir)
|
| 241 |
+
elif eval_mode == "Batch Evaluation":
|
| 242 |
+
batch_evaluation(filtered_images, existing_evals, output_dir)
|
| 243 |
+
else:
|
| 244 |
+
summary_analysis(existing_evals)
|
| 245 |
+
|
| 246 |
+
def single_image_evaluation(images: List[Dict], existing_evals: Dict, output_dir: str):
|
| 247 |
+
"""Single image evaluation interface."""
|
| 248 |
+
if not images:
|
| 249 |
+
st.warning("No images available for evaluation.")
|
| 250 |
+
return
|
| 251 |
+
|
| 252 |
+
# Image selection
|
| 253 |
+
image_options = [f"{img['filename']} {'✅' if img['is_evaluated'] else '❌'}" for img in images]
|
| 254 |
+
selected_idx = st.selectbox("Select Image to Evaluate", range(len(image_options)), format_func=lambda x: image_options[x])
|
| 255 |
+
|
| 256 |
+
if selected_idx is None:
|
| 257 |
+
return
|
| 258 |
+
|
| 259 |
+
img_data = images[selected_idx]
|
| 260 |
+
fname = img_data["filename"]
|
| 261 |
+
img_path = os.path.join(output_dir, fname)
|
| 262 |
+
|
| 263 |
+
# Display image and metadata
|
| 264 |
+
col1, col2 = st.columns([1, 1])
|
| 265 |
+
|
| 266 |
+
with col1:
|
| 267 |
+
st.subheader("🖼️ Image")
|
| 268 |
+
try:
|
| 269 |
+
image = Image.open(img_path)
|
| 270 |
+
st.image(image, use_container_width=True)
|
| 271 |
+
|
| 272 |
+
# Calculate metrics
|
| 273 |
+
metrics = get_image_metrics(img_path)
|
| 274 |
+
if "error" not in metrics:
|
| 275 |
+
st.subheader("📊 Objective Metrics")
|
| 276 |
+
st.json(metrics)
|
| 277 |
+
except Exception as e:
|
| 278 |
+
st.error(f"Error loading image: {e}")
|
| 279 |
+
return
|
| 280 |
+
|
| 281 |
+
with col2:
|
| 282 |
+
st.subheader("📋 Metadata")
|
| 283 |
+
st.json({k: v for k, v in img_data.items() if k != "filename"})
|
| 284 |
+
|
| 285 |
+
# Evaluation form
|
| 286 |
+
st.subheader("⭐ Evaluation")
|
| 287 |
+
|
| 288 |
+
# Load existing scores if available
|
| 289 |
+
existing = existing_evals.get(fname, {})
|
| 290 |
+
|
| 291 |
+
with st.form(f"eval_form_{fname}"):
|
| 292 |
+
scores = {}
|
| 293 |
+
for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
|
| 294 |
+
scores[criterion_key] = st.slider(
|
| 295 |
+
f"{criterion_info['name']}",
|
| 296 |
+
min_value=1, max_value=5,
|
| 297 |
+
value=int(existing.get(criterion_key, 3)),
|
| 298 |
+
help=f"{criterion_info['description']}\n{criterion_info['scale']}"
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
notes = st.text_area(
|
| 302 |
+
"Notes & Comments",
|
| 303 |
+
value=existing.get("notes", ""),
|
| 304 |
+
help="Additional observations, issues, or suggestions"
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
submitted = st.form_submit_button("💾 Save Evaluation")
|
| 308 |
+
|
| 309 |
+
if submitted:
|
| 310 |
+
save_evaluation(fname, img_data, scores, notes, metrics)
|
| 311 |
+
st.success(f"✅ Evaluation saved for {fname}")
|
| 312 |
+
st.experimental_rerun()
|
| 313 |
+
|
| 314 |
+
def batch_evaluation(images: List[Dict], existing_evals: Dict, output_dir: str):
|
| 315 |
+
"""Batch evaluation interface for multiple images."""
|
| 316 |
+
st.subheader("📦 Batch Evaluation")
|
| 317 |
+
|
| 318 |
+
unevaluated = [img for img in images if not img['is_evaluated']]
|
| 319 |
+
|
| 320 |
+
if not unevaluated:
|
| 321 |
+
st.info("All images have been evaluated!")
|
| 322 |
+
return
|
| 323 |
+
|
| 324 |
+
st.info(f"{len(unevaluated)} images pending evaluation")
|
| 325 |
+
|
| 326 |
+
# Quick batch scoring
|
| 327 |
+
with st.form("batch_eval_form"):
|
| 328 |
+
st.write("**Quick Batch Scoring** (applies to all unevaluated images)")
|
| 329 |
+
|
| 330 |
+
batch_scores = {}
|
| 331 |
+
for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
|
| 332 |
+
batch_scores[criterion_key] = st.slider(
|
| 333 |
+
f"Default {criterion_info['name']}",
|
| 334 |
+
min_value=1, max_value=5, value=3,
|
| 335 |
+
help=f"Default score for {criterion_info['description']}"
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
batch_notes = st.text_area("Default Notes", "Batch evaluation")
|
| 339 |
+
|
| 340 |
+
if st.form_submit_button("Apply to All Unevaluated"):
|
| 341 |
+
progress_bar = st.progress(0)
|
| 342 |
+
|
| 343 |
+
for i, img_data in enumerate(unevaluated):
|
| 344 |
+
fname = img_data["filename"]
|
| 345 |
+
img_path = os.path.join(output_dir, fname)
|
| 346 |
+
metrics = get_image_metrics(img_path)
|
| 347 |
+
|
| 348 |
+
save_evaluation(fname, img_data, batch_scores, batch_notes, metrics)
|
| 349 |
+
progress_bar.progress((i + 1) / len(unevaluated))
|
| 350 |
+
|
| 351 |
+
st.success(f"✅ Batch evaluation completed for {len(unevaluated)} images!")
|
| 352 |
+
st.experimental_rerun()
|
| 353 |
+
|
| 354 |
+
def summary_analysis(existing_evals: Dict):
|
| 355 |
+
"""Display evaluation summary and analytics."""
|
| 356 |
+
st.subheader("📈 Evaluation Summary & Analytics")
|
| 357 |
+
|
| 358 |
+
if not existing_evals:
|
| 359 |
+
st.warning("No evaluations found. Please evaluate some images first.")
|
| 360 |
+
return
|
| 361 |
+
|
| 362 |
+
# Convert to DataFrame for analysis
|
| 363 |
+
df = pd.DataFrame.from_dict(existing_evals, orient='index')
|
| 364 |
+
|
| 365 |
+
# Basic statistics
|
| 366 |
+
col1, col2, col3 = st.columns(3)
|
| 367 |
+
|
| 368 |
+
with col1:
|
| 369 |
+
st.metric("Total Evaluated", len(df))
|
| 370 |
+
|
| 371 |
+
with col2:
|
| 372 |
+
if 'prompt_match' in df.columns:
|
| 373 |
+
avg_prompt_match = df['prompt_match'].mean()
|
| 374 |
+
st.metric("Avg Prompt Match", f"{avg_prompt_match:.2f}/5")
|
| 375 |
+
|
| 376 |
+
with col3:
|
| 377 |
+
if 'technical_quality' in df.columns:
|
| 378 |
+
avg_quality = df['technical_quality'].mean()
|
| 379 |
+
st.metric("Avg Technical Quality", f"{avg_quality:.2f}/5")
|
| 380 |
+
|
| 381 |
+
# Detailed analytics
|
| 382 |
+
st.subheader("📊 Detailed Analytics")
|
| 383 |
+
|
| 384 |
+
# Score distribution
|
| 385 |
+
if any(col in df.columns for col in EVALUATION_CRITERIA.keys()):
|
| 386 |
+
st.write("**Score Distribution by Criteria**")
|
| 387 |
+
|
| 388 |
+
score_cols = [col for col in EVALUATION_CRITERIA.keys() if col in df.columns]
|
| 389 |
+
if score_cols:
|
| 390 |
+
score_data = df[score_cols].mean().sort_values(ascending=False)
|
| 391 |
+
st.bar_chart(score_data)
|
| 392 |
+
|
| 393 |
+
# Style/Mood analysis
|
| 394 |
+
if 'style' in df.columns and 'mood' in df.columns:
|
| 395 |
+
st.write("**Performance by Style & Mood**")
|
| 396 |
+
|
| 397 |
+
col1, col2 = st.columns(2)
|
| 398 |
+
|
| 399 |
+
with col1:
|
| 400 |
+
if 'prompt_match' in df.columns:
|
| 401 |
+
style_performance = df.groupby('style')['prompt_match'].mean().sort_values(ascending=False)
|
| 402 |
+
st.write("**Best Performing Styles (Prompt Match)**")
|
| 403 |
+
st.bar_chart(style_performance)
|
| 404 |
+
|
| 405 |
+
with col2:
|
| 406 |
+
if 'creative_appeal' in df.columns:
|
| 407 |
+
mood_performance = df.groupby('mood')['creative_appeal'].mean().sort_values(ascending=False)
|
| 408 |
+
st.write("**Best Performing Moods (Creative Appeal)**")
|
| 409 |
+
st.bar_chart(mood_performance)
|
| 410 |
+
|
| 411 |
+
# Recent evaluations
|
| 412 |
+
st.subheader("🕒 Recent Evaluations")
|
| 413 |
+
|
| 414 |
+
if 'timestamp' in df.columns:
|
| 415 |
+
recent_df = df.sort_values('timestamp', ascending=False).head(10)
|
| 416 |
+
display_cols = ['prompt', 'style', 'mood'] + [col for col in EVALUATION_CRITERIA.keys() if col in df.columns]
|
| 417 |
+
display_cols = [col for col in display_cols if col in recent_df.columns]
|
| 418 |
+
|
| 419 |
+
if display_cols:
|
| 420 |
+
st.dataframe(recent_df[display_cols])
|
| 421 |
+
|
| 422 |
+
# Export options
|
| 423 |
+
st.subheader("💾 Export Data")
|
| 424 |
+
|
| 425 |
+
col1, col2 = st.columns(2)
|
| 426 |
+
|
| 427 |
+
with col1:
|
| 428 |
+
if st.button("📊 Download CSV"):
|
| 429 |
+
csv_data = df.to_csv()
|
| 430 |
+
st.download_button(
|
| 431 |
+
label="Download Evaluation Data",
|
| 432 |
+
data=csv_data,
|
| 433 |
+
file_name=f"compi_evaluation_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
|
| 434 |
+
mime="text/csv"
|
| 435 |
+
)
|
| 436 |
+
|
| 437 |
+
with col2:
|
| 438 |
+
if st.button("📋 Generate Report"):
|
| 439 |
+
# Generate summary report
|
| 440 |
+
report = generate_evaluation_report(df)
|
| 441 |
+
st.text_area("Evaluation Report", report, height=300)
|
| 442 |
+
|
| 443 |
+
def generate_evaluation_report(df: pd.DataFrame) -> str:
|
| 444 |
+
"""Generate a text summary report of evaluations."""
|
| 445 |
+
report_lines = [
|
| 446 |
+
"# CompI Phase 1.D - Evaluation Report",
|
| 447 |
+
f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
| 448 |
+
"",
|
| 449 |
+
"## Summary Statistics",
|
| 450 |
+
f"- Total Images Evaluated: {len(df)}",
|
| 451 |
+
]
|
| 452 |
+
|
| 453 |
+
# Add score summaries
|
| 454 |
+
for criterion_key, criterion_info in EVALUATION_CRITERIA.items():
|
| 455 |
+
if criterion_key in df.columns:
|
| 456 |
+
mean_score = df[criterion_key].mean()
|
| 457 |
+
std_score = df[criterion_key].std()
|
| 458 |
+
report_lines.append(f"- {criterion_info['name']}: {mean_score:.2f} ± {std_score:.2f}")
|
| 459 |
+
|
| 460 |
+
# Add style/mood analysis
|
| 461 |
+
if 'style' in df.columns:
|
| 462 |
+
report_lines.extend([
|
| 463 |
+
"",
|
| 464 |
+
"## Style Performance",
|
| 465 |
+
])
|
| 466 |
+
|
| 467 |
+
if 'prompt_match' in df.columns:
|
| 468 |
+
style_scores = df.groupby('style')['prompt_match'].mean().sort_values(ascending=False)
|
| 469 |
+
for style, score in style_scores.head(5).items():
|
| 470 |
+
report_lines.append(f"- {style}: {score:.2f}")
|
| 471 |
+
|
| 472 |
+
if 'mood' in df.columns:
|
| 473 |
+
report_lines.extend([
|
| 474 |
+
"",
|
| 475 |
+
"## Mood Performance",
|
| 476 |
+
])
|
| 477 |
+
|
| 478 |
+
if 'creative_appeal' in df.columns:
|
| 479 |
+
mood_scores = df.groupby('mood')['creative_appeal'].mean().sort_values(ascending=False)
|
| 480 |
+
for mood, score in mood_scores.head(5).items():
|
| 481 |
+
report_lines.append(f"- {mood}: {score:.2f}")
|
| 482 |
+
|
| 483 |
+
# Add recommendations
|
| 484 |
+
report_lines.extend([
|
| 485 |
+
"",
|
| 486 |
+
"## Recommendations",
|
| 487 |
+
"- Focus on improving lowest-scoring criteria",
|
| 488 |
+
"- Experiment with best-performing style/mood combinations",
|
| 489 |
+
"- Consider adjusting generation parameters for consistency",
|
| 490 |
+
"- Continue systematic evaluation for trend analysis"
|
| 491 |
+
])
|
| 492 |
+
|
| 493 |
+
return "\n".join(report_lines)
|
| 494 |
+
|
| 495 |
+
if __name__ == "__main__":
|
| 496 |
+
main()
|
src/generators/compi_phase1e_dataset_prep.py
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 1.E: Dataset Preparation for LoRA Fine-tuning
|
| 4 |
+
|
| 5 |
+
This tool helps prepare your personal style dataset for LoRA training:
|
| 6 |
+
- Organize and validate style images
|
| 7 |
+
- Generate appropriate captions
|
| 8 |
+
- Resize and format images for training
|
| 9 |
+
- Create training/validation splits
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
python src/generators/compi_phase1e_dataset_prep.py --help
|
| 13 |
+
python src/generators/compi_phase1e_dataset_prep.py --input-dir my_style_images --style-name "my_art_style"
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
import argparse
|
| 18 |
+
import json
|
| 19 |
+
import shutil
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import List, Dict, Tuple
|
| 22 |
+
import random
|
| 23 |
+
|
| 24 |
+
from PIL import Image, ImageOps
|
| 25 |
+
import pandas as pd
|
| 26 |
+
|
| 27 |
+
# -------- 1. CONFIGURATION --------
|
| 28 |
+
|
| 29 |
+
DEFAULT_IMAGE_SIZE = 512
|
| 30 |
+
SUPPORTED_FORMATS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
|
| 31 |
+
MIN_IMAGES_RECOMMENDED = 10
|
| 32 |
+
TRAIN_SPLIT_RATIO = 0.8
|
| 33 |
+
|
| 34 |
+
# -------- 2. UTILITY FUNCTIONS --------
|
| 35 |
+
|
| 36 |
+
def setup_args():
|
| 37 |
+
"""Setup command line arguments."""
|
| 38 |
+
parser = argparse.ArgumentParser(
|
| 39 |
+
description="CompI Phase 1.E: Dataset Preparation for LoRA Fine-tuning",
|
| 40 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 41 |
+
epilog="""
|
| 42 |
+
Examples:
|
| 43 |
+
# Prepare dataset from a folder of images
|
| 44 |
+
python %(prog)s --input-dir my_artwork --style-name "impressionist_style"
|
| 45 |
+
|
| 46 |
+
# Custom output directory and image size
|
| 47 |
+
python %(prog)s --input-dir paintings --style-name "oil_painting" --output-dir datasets/oil_style --size 768
|
| 48 |
+
|
| 49 |
+
# Generate captions with custom trigger word
|
| 50 |
+
python %(prog)s --input-dir sketches --style-name "pencil_sketch" --trigger-word "sketch_style"
|
| 51 |
+
"""
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
parser.add_argument("--input-dir", required=True,
|
| 55 |
+
help="Directory containing your style images")
|
| 56 |
+
|
| 57 |
+
parser.add_argument("--style-name", required=True,
|
| 58 |
+
help="Name for your style (used in file naming and captions)")
|
| 59 |
+
|
| 60 |
+
parser.add_argument("--output-dir",
|
| 61 |
+
help="Output directory for prepared dataset (default: datasets/{style_name})")
|
| 62 |
+
|
| 63 |
+
parser.add_argument("--trigger-word",
|
| 64 |
+
help="Trigger word for style (default: style_name)")
|
| 65 |
+
|
| 66 |
+
parser.add_argument("--size", type=int, default=DEFAULT_IMAGE_SIZE,
|
| 67 |
+
help=f"Target image size in pixels (default: {DEFAULT_IMAGE_SIZE})")
|
| 68 |
+
|
| 69 |
+
parser.add_argument("--caption-template",
|
| 70 |
+
default="a painting in {trigger_word} style",
|
| 71 |
+
help="Template for generating captions")
|
| 72 |
+
|
| 73 |
+
parser.add_argument("--train-split", type=float, default=TRAIN_SPLIT_RATIO,
|
| 74 |
+
help=f"Ratio for train/validation split (default: {TRAIN_SPLIT_RATIO})")
|
| 75 |
+
|
| 76 |
+
parser.add_argument("--copy-images", action="store_true",
|
| 77 |
+
help="Copy images instead of creating symlinks")
|
| 78 |
+
|
| 79 |
+
parser.add_argument("--validate-only", action="store_true",
|
| 80 |
+
help="Only validate input directory without processing")
|
| 81 |
+
|
| 82 |
+
return parser.parse_args()
|
| 83 |
+
|
| 84 |
+
def validate_image_directory(input_dir: str) -> Tuple[List[str], List[str]]:
|
| 85 |
+
"""Validate input directory and return valid/invalid image files."""
|
| 86 |
+
if not os.path.exists(input_dir):
|
| 87 |
+
raise FileNotFoundError(f"Input directory not found: {input_dir}")
|
| 88 |
+
|
| 89 |
+
all_files = os.listdir(input_dir)
|
| 90 |
+
valid_images = []
|
| 91 |
+
invalid_files = []
|
| 92 |
+
|
| 93 |
+
for filename in all_files:
|
| 94 |
+
filepath = os.path.join(input_dir, filename)
|
| 95 |
+
|
| 96 |
+
# Check if it's a file
|
| 97 |
+
if not os.path.isfile(filepath):
|
| 98 |
+
continue
|
| 99 |
+
|
| 100 |
+
# Check extension
|
| 101 |
+
ext = Path(filename).suffix.lower()
|
| 102 |
+
if ext not in SUPPORTED_FORMATS:
|
| 103 |
+
invalid_files.append(f"{filename} (unsupported format: {ext})")
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
# Try to open image
|
| 107 |
+
try:
|
| 108 |
+
with Image.open(filepath) as img:
|
| 109 |
+
# Basic validation
|
| 110 |
+
if img.size[0] < 64 or img.size[1] < 64:
|
| 111 |
+
invalid_files.append(f"{filename} (too small: {img.size})")
|
| 112 |
+
continue
|
| 113 |
+
|
| 114 |
+
valid_images.append(filename)
|
| 115 |
+
except Exception as e:
|
| 116 |
+
invalid_files.append(f"{filename} (corrupt: {str(e)})")
|
| 117 |
+
|
| 118 |
+
return valid_images, invalid_files
|
| 119 |
+
|
| 120 |
+
def process_image(input_path: str, output_path: str, target_size: int) -> Dict:
|
| 121 |
+
"""Process a single image for training."""
|
| 122 |
+
with Image.open(input_path) as img:
|
| 123 |
+
# Convert to RGB if needed
|
| 124 |
+
if img.mode != 'RGB':
|
| 125 |
+
img = img.convert('RGB')
|
| 126 |
+
|
| 127 |
+
# Get original dimensions
|
| 128 |
+
original_size = img.size
|
| 129 |
+
|
| 130 |
+
# Resize maintaining aspect ratio, then center crop
|
| 131 |
+
img = ImageOps.fit(img, (target_size, target_size), Image.Resampling.LANCZOS)
|
| 132 |
+
|
| 133 |
+
# Save processed image
|
| 134 |
+
img.save(output_path, 'PNG', quality=95)
|
| 135 |
+
|
| 136 |
+
return {
|
| 137 |
+
'original_size': original_size,
|
| 138 |
+
'processed_size': img.size,
|
| 139 |
+
'format': 'PNG'
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
def generate_captions(image_files: List[str], caption_template: str, trigger_word: str) -> Dict[str, str]:
|
| 143 |
+
"""Generate captions for training images."""
|
| 144 |
+
captions = {}
|
| 145 |
+
|
| 146 |
+
for filename in image_files:
|
| 147 |
+
# Basic caption using template
|
| 148 |
+
caption = caption_template.format(trigger_word=trigger_word)
|
| 149 |
+
|
| 150 |
+
# You could add more sophisticated caption generation here
|
| 151 |
+
# For example, using BLIP or other image captioning models
|
| 152 |
+
|
| 153 |
+
captions[filename] = caption
|
| 154 |
+
|
| 155 |
+
return captions
|
| 156 |
+
|
| 157 |
+
def create_dataset_structure(output_dir: str, style_name: str):
|
| 158 |
+
"""Create the dataset directory structure."""
|
| 159 |
+
dataset_dir = Path(output_dir)
|
| 160 |
+
|
| 161 |
+
# Create main directories
|
| 162 |
+
dirs_to_create = [
|
| 163 |
+
dataset_dir,
|
| 164 |
+
dataset_dir / "images",
|
| 165 |
+
dataset_dir / "train",
|
| 166 |
+
dataset_dir / "validation"
|
| 167 |
+
]
|
| 168 |
+
|
| 169 |
+
for dir_path in dirs_to_create:
|
| 170 |
+
dir_path.mkdir(parents=True, exist_ok=True)
|
| 171 |
+
|
| 172 |
+
return dataset_dir
|
| 173 |
+
|
| 174 |
+
def split_dataset(image_files: List[str], train_ratio: float) -> Tuple[List[str], List[str]]:
|
| 175 |
+
"""Split images into train and validation sets."""
|
| 176 |
+
random.shuffle(image_files)
|
| 177 |
+
|
| 178 |
+
train_count = int(len(image_files) * train_ratio)
|
| 179 |
+
train_files = image_files[:train_count]
|
| 180 |
+
val_files = image_files[train_count:]
|
| 181 |
+
|
| 182 |
+
return train_files, val_files
|
| 183 |
+
|
| 184 |
+
def save_metadata(dataset_dir: Path, metadata: Dict):
|
| 185 |
+
"""Save dataset metadata."""
|
| 186 |
+
metadata_file = dataset_dir / "dataset_info.json"
|
| 187 |
+
|
| 188 |
+
with open(metadata_file, 'w') as f:
|
| 189 |
+
json.dump(metadata, f, indent=2)
|
| 190 |
+
|
| 191 |
+
print(f"📄 Dataset metadata saved to: {metadata_file}")
|
| 192 |
+
|
| 193 |
+
def create_captions_file(dataset_dir: Path, captions: Dict[str, str], split_name: str):
|
| 194 |
+
"""Create captions file for training."""
|
| 195 |
+
captions_file = dataset_dir / f"{split_name}_captions.txt"
|
| 196 |
+
|
| 197 |
+
with open(captions_file, 'w') as f:
|
| 198 |
+
for filename, caption in captions.items():
|
| 199 |
+
f.write(f"{filename}: {caption}\n")
|
| 200 |
+
|
| 201 |
+
return captions_file
|
| 202 |
+
|
| 203 |
+
# -------- 3. MAIN PROCESSING FUNCTION --------
|
| 204 |
+
|
| 205 |
+
def prepare_dataset(args):
|
| 206 |
+
"""Main dataset preparation function."""
|
| 207 |
+
print(f"🎨 CompI Phase 1.E: Preparing LoRA Dataset for '{args.style_name}'")
|
| 208 |
+
print("=" * 60)
|
| 209 |
+
|
| 210 |
+
# Setup paths
|
| 211 |
+
input_dir = Path(args.input_dir)
|
| 212 |
+
if args.output_dir:
|
| 213 |
+
output_dir = Path(args.output_dir)
|
| 214 |
+
else:
|
| 215 |
+
output_dir = Path("datasets") / args.style_name
|
| 216 |
+
|
| 217 |
+
trigger_word = args.trigger_word or args.style_name
|
| 218 |
+
|
| 219 |
+
print(f"📁 Input directory: {input_dir}")
|
| 220 |
+
print(f"📁 Output directory: {output_dir}")
|
| 221 |
+
print(f"🎯 Style name: {args.style_name}")
|
| 222 |
+
print(f"🔤 Trigger word: {trigger_word}")
|
| 223 |
+
print(f"📐 Target size: {args.size}x{args.size}")
|
| 224 |
+
|
| 225 |
+
# Validate input directory
|
| 226 |
+
print(f"\n🔍 Validating input directory...")
|
| 227 |
+
valid_images, invalid_files = validate_image_directory(str(input_dir))
|
| 228 |
+
|
| 229 |
+
print(f"✅ Found {len(valid_images)} valid images")
|
| 230 |
+
if invalid_files:
|
| 231 |
+
print(f"⚠️ Found {len(invalid_files)} invalid files:")
|
| 232 |
+
for invalid in invalid_files[:5]: # Show first 5
|
| 233 |
+
print(f" - {invalid}")
|
| 234 |
+
if len(invalid_files) > 5:
|
| 235 |
+
print(f" ... and {len(invalid_files) - 5} more")
|
| 236 |
+
|
| 237 |
+
if len(valid_images) < MIN_IMAGES_RECOMMENDED:
|
| 238 |
+
print(f"⚠️ Warning: Only {len(valid_images)} images found. Recommended minimum: {MIN_IMAGES_RECOMMENDED}")
|
| 239 |
+
print(" Consider adding more images for better style learning.")
|
| 240 |
+
|
| 241 |
+
if args.validate_only:
|
| 242 |
+
print("✅ Validation complete (--validate-only specified)")
|
| 243 |
+
return
|
| 244 |
+
|
| 245 |
+
# Create dataset structure
|
| 246 |
+
print(f"\n📁 Creating dataset structure...")
|
| 247 |
+
dataset_dir = create_dataset_structure(str(output_dir), args.style_name)
|
| 248 |
+
|
| 249 |
+
# Split dataset
|
| 250 |
+
train_files, val_files = split_dataset(valid_images, args.train_split)
|
| 251 |
+
print(f"📊 Dataset split: {len(train_files)} train, {len(val_files)} validation")
|
| 252 |
+
|
| 253 |
+
# Generate captions
|
| 254 |
+
print(f"\n📝 Generating captions...")
|
| 255 |
+
all_captions = generate_captions(valid_images, args.caption_template, trigger_word)
|
| 256 |
+
|
| 257 |
+
# Process images
|
| 258 |
+
print(f"\n🖼️ Processing images...")
|
| 259 |
+
processed_count = 0
|
| 260 |
+
processing_stats = []
|
| 261 |
+
|
| 262 |
+
for split_name, file_list in [("train", train_files), ("validation", val_files)]:
|
| 263 |
+
if not file_list:
|
| 264 |
+
continue
|
| 265 |
+
|
| 266 |
+
split_dir = dataset_dir / split_name
|
| 267 |
+
split_captions = {}
|
| 268 |
+
|
| 269 |
+
for filename in file_list:
|
| 270 |
+
input_path = input_dir / filename
|
| 271 |
+
output_filename = f"{Path(filename).stem}.png"
|
| 272 |
+
output_path = split_dir / output_filename
|
| 273 |
+
|
| 274 |
+
try:
|
| 275 |
+
stats = process_image(str(input_path), str(output_path), args.size)
|
| 276 |
+
processing_stats.append(stats)
|
| 277 |
+
split_captions[output_filename] = all_captions[filename]
|
| 278 |
+
processed_count += 1
|
| 279 |
+
|
| 280 |
+
if processed_count % 10 == 0:
|
| 281 |
+
print(f" Processed {processed_count}/{len(valid_images)} images...")
|
| 282 |
+
|
| 283 |
+
except Exception as e:
|
| 284 |
+
print(f"❌ Error processing {filename}: {e}")
|
| 285 |
+
|
| 286 |
+
# Create captions file for this split
|
| 287 |
+
if split_captions:
|
| 288 |
+
captions_file = create_captions_file(dataset_dir, split_captions, split_name)
|
| 289 |
+
print(f"📝 Created {split_name} captions: {captions_file}")
|
| 290 |
+
|
| 291 |
+
# Save metadata
|
| 292 |
+
metadata = {
|
| 293 |
+
'style_name': args.style_name,
|
| 294 |
+
'trigger_word': trigger_word,
|
| 295 |
+
'total_images': len(valid_images),
|
| 296 |
+
'train_images': len(train_files),
|
| 297 |
+
'validation_images': len(val_files),
|
| 298 |
+
'image_size': args.size,
|
| 299 |
+
'caption_template': args.caption_template,
|
| 300 |
+
'created_at': pd.Timestamp.now().isoformat(),
|
| 301 |
+
'processing_stats': {
|
| 302 |
+
'processed_count': processed_count,
|
| 303 |
+
'failed_count': len(valid_images) - processed_count
|
| 304 |
+
}
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
save_metadata(dataset_dir, metadata)
|
| 308 |
+
|
| 309 |
+
print(f"\n🎉 Dataset preparation complete!")
|
| 310 |
+
print(f"📁 Dataset location: {dataset_dir}")
|
| 311 |
+
print(f"📊 Ready for LoRA training with {processed_count} processed images")
|
| 312 |
+
print(f"\n💡 Next steps:")
|
| 313 |
+
print(f" 1. Review the generated dataset in: {dataset_dir}")
|
| 314 |
+
print(f" 2. Run LoRA training: python src/generators/compi_phase1e_lora_training.py --dataset-dir {dataset_dir}")
|
| 315 |
+
|
| 316 |
+
def main():
|
| 317 |
+
"""Main function."""
|
| 318 |
+
args = setup_args()
|
| 319 |
+
|
| 320 |
+
try:
|
| 321 |
+
prepare_dataset(args)
|
| 322 |
+
except Exception as e:
|
| 323 |
+
print(f"❌ Error: {e}")
|
| 324 |
+
return 1
|
| 325 |
+
|
| 326 |
+
return 0
|
| 327 |
+
|
| 328 |
+
if __name__ == "__main__":
|
| 329 |
+
exit(main())
|
src/generators/compi_phase1e_lora_training.py
ADDED
|
@@ -0,0 +1,458 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 1.E: LoRA Fine-tuning for Personal Style
|
| 4 |
+
|
| 5 |
+
This script implements LoRA (Low-Rank Adaptation) fine-tuning for Stable Diffusion
|
| 6 |
+
to learn your personal artistic style.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
python src/generators/compi_phase1e_lora_training.py --dataset-dir datasets/my_style
|
| 10 |
+
python src/generators/compi_phase1e_lora_training.py --help
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
import argparse
|
| 15 |
+
import json
|
| 16 |
+
import math
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Dict, List, Optional
|
| 19 |
+
import logging
|
| 20 |
+
|
| 21 |
+
import torch
|
| 22 |
+
import torch.nn.functional as F
|
| 23 |
+
from torch.utils.data import Dataset, DataLoader
|
| 24 |
+
from PIL import Image
|
| 25 |
+
import numpy as np
|
| 26 |
+
from tqdm import tqdm
|
| 27 |
+
|
| 28 |
+
# Diffusers and transformers
|
| 29 |
+
from diffusers import (
|
| 30 |
+
StableDiffusionPipeline,
|
| 31 |
+
UNet2DConditionModel,
|
| 32 |
+
DDPMScheduler,
|
| 33 |
+
AutoencoderKL
|
| 34 |
+
)
|
| 35 |
+
from transformers import CLIPTextModel, CLIPTokenizer
|
| 36 |
+
from peft import LoraConfig, get_peft_model, TaskType
|
| 37 |
+
|
| 38 |
+
# -------- 1. CONFIGURATION --------
|
| 39 |
+
|
| 40 |
+
DEFAULT_MODEL = "runwayml/stable-diffusion-v1-5"
|
| 41 |
+
DEFAULT_RESOLUTION = 512
|
| 42 |
+
DEFAULT_BATCH_SIZE = 1
|
| 43 |
+
DEFAULT_LEARNING_RATE = 1e-4
|
| 44 |
+
DEFAULT_EPOCHS = 100
|
| 45 |
+
DEFAULT_LORA_RANK = 4
|
| 46 |
+
DEFAULT_LORA_ALPHA = 32
|
| 47 |
+
|
| 48 |
+
# -------- 2. DATASET CLASS --------
|
| 49 |
+
|
| 50 |
+
class StyleDataset(Dataset):
|
| 51 |
+
"""Dataset class for LoRA fine-tuning."""
|
| 52 |
+
|
| 53 |
+
def __init__(self, dataset_dir: str, split: str = "train", resolution: int = 512):
|
| 54 |
+
self.dataset_dir = Path(dataset_dir)
|
| 55 |
+
self.split = split
|
| 56 |
+
self.resolution = resolution
|
| 57 |
+
|
| 58 |
+
# Load images and captions
|
| 59 |
+
self.images_dir = self.dataset_dir / split
|
| 60 |
+
self.captions_file = self.dataset_dir / f"{split}_captions.txt"
|
| 61 |
+
|
| 62 |
+
if not self.images_dir.exists():
|
| 63 |
+
raise FileNotFoundError(f"Images directory not found: {self.images_dir}")
|
| 64 |
+
|
| 65 |
+
if not self.captions_file.exists():
|
| 66 |
+
raise FileNotFoundError(f"Captions file not found: {self.captions_file}")
|
| 67 |
+
|
| 68 |
+
# Load captions
|
| 69 |
+
self.image_captions = {}
|
| 70 |
+
with open(self.captions_file, 'r') as f:
|
| 71 |
+
for line in f:
|
| 72 |
+
if ':' in line:
|
| 73 |
+
filename, caption = line.strip().split(':', 1)
|
| 74 |
+
self.image_captions[filename.strip()] = caption.strip()
|
| 75 |
+
|
| 76 |
+
# Get list of images
|
| 77 |
+
self.image_files = [f for f in os.listdir(self.images_dir)
|
| 78 |
+
if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
|
| 79 |
+
|
| 80 |
+
# Filter to only images with captions
|
| 81 |
+
self.image_files = [f for f in self.image_files if f in self.image_captions]
|
| 82 |
+
|
| 83 |
+
print(f"Loaded {len(self.image_files)} images for {split} split")
|
| 84 |
+
|
| 85 |
+
def __len__(self):
|
| 86 |
+
return len(self.image_files)
|
| 87 |
+
|
| 88 |
+
def __getitem__(self, idx):
|
| 89 |
+
filename = self.image_files[idx]
|
| 90 |
+
image_path = self.images_dir / filename
|
| 91 |
+
caption = self.image_captions[filename]
|
| 92 |
+
|
| 93 |
+
# Load and preprocess image
|
| 94 |
+
image = Image.open(image_path).convert('RGB')
|
| 95 |
+
image = image.resize((self.resolution, self.resolution), Image.Resampling.LANCZOS)
|
| 96 |
+
|
| 97 |
+
# Convert to tensor and normalize to [-1, 1]
|
| 98 |
+
image = np.array(image).astype(np.float32) / 255.0
|
| 99 |
+
image = (image - 0.5) / 0.5
|
| 100 |
+
image = torch.from_numpy(image).permute(2, 0, 1)
|
| 101 |
+
|
| 102 |
+
return {
|
| 103 |
+
'pixel_values': image,
|
| 104 |
+
'caption': caption,
|
| 105 |
+
'filename': filename
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
# -------- 3. TRAINING FUNCTIONS --------
|
| 109 |
+
|
| 110 |
+
def setup_args():
|
| 111 |
+
"""Setup command line arguments."""
|
| 112 |
+
parser = argparse.ArgumentParser(
|
| 113 |
+
description="CompI Phase 1.E: LoRA Fine-tuning for Personal Style",
|
| 114 |
+
formatter_class=argparse.RawDescriptionHelpFormatter
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
parser.add_argument("--dataset-dir", required=True,
|
| 118 |
+
help="Directory containing prepared dataset")
|
| 119 |
+
|
| 120 |
+
parser.add_argument("--output-dir",
|
| 121 |
+
help="Output directory for LoRA weights (default: lora_models/{style_name})")
|
| 122 |
+
|
| 123 |
+
parser.add_argument("--model-name", default=DEFAULT_MODEL,
|
| 124 |
+
help=f"Base Stable Diffusion model (default: {DEFAULT_MODEL})")
|
| 125 |
+
|
| 126 |
+
parser.add_argument("--resolution", type=int, default=DEFAULT_RESOLUTION,
|
| 127 |
+
help=f"Training resolution (default: {DEFAULT_RESOLUTION})")
|
| 128 |
+
|
| 129 |
+
parser.add_argument("--batch-size", type=int, default=DEFAULT_BATCH_SIZE,
|
| 130 |
+
help=f"Training batch size (default: {DEFAULT_BATCH_SIZE})")
|
| 131 |
+
|
| 132 |
+
parser.add_argument("--learning-rate", type=float, default=DEFAULT_LEARNING_RATE,
|
| 133 |
+
help=f"Learning rate (default: {DEFAULT_LEARNING_RATE})")
|
| 134 |
+
|
| 135 |
+
parser.add_argument("--epochs", type=int, default=DEFAULT_EPOCHS,
|
| 136 |
+
help=f"Number of training epochs (default: {DEFAULT_EPOCHS})")
|
| 137 |
+
|
| 138 |
+
parser.add_argument("--lora-rank", type=int, default=DEFAULT_LORA_RANK,
|
| 139 |
+
help=f"LoRA rank (default: {DEFAULT_LORA_RANK})")
|
| 140 |
+
|
| 141 |
+
parser.add_argument("--lora-alpha", type=int, default=DEFAULT_LORA_ALPHA,
|
| 142 |
+
help=f"LoRA alpha (default: {DEFAULT_LORA_ALPHA})")
|
| 143 |
+
|
| 144 |
+
parser.add_argument("--save-steps", type=int, default=100,
|
| 145 |
+
help="Save checkpoint every N steps")
|
| 146 |
+
|
| 147 |
+
parser.add_argument("--validation-steps", type=int, default=50,
|
| 148 |
+
help="Run validation every N steps")
|
| 149 |
+
|
| 150 |
+
parser.add_argument("--mixed-precision", action="store_true",
|
| 151 |
+
help="Use mixed precision training")
|
| 152 |
+
|
| 153 |
+
parser.add_argument("--gradient-checkpointing", action="store_true",
|
| 154 |
+
help="Use gradient checkpointing to save memory")
|
| 155 |
+
|
| 156 |
+
return parser.parse_args()
|
| 157 |
+
|
| 158 |
+
def load_models(model_name: str, device: str):
|
| 159 |
+
"""Load Stable Diffusion components."""
|
| 160 |
+
print(f"Loading models from {model_name}...")
|
| 161 |
+
|
| 162 |
+
# Load tokenizer and text encoder
|
| 163 |
+
tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder="tokenizer")
|
| 164 |
+
text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder="text_encoder")
|
| 165 |
+
|
| 166 |
+
# Load VAE
|
| 167 |
+
vae = AutoencoderKL.from_pretrained(model_name, subfolder="vae")
|
| 168 |
+
|
| 169 |
+
# Load UNet
|
| 170 |
+
unet = UNet2DConditionModel.from_pretrained(model_name, subfolder="unet")
|
| 171 |
+
|
| 172 |
+
# Load noise scheduler
|
| 173 |
+
noise_scheduler = DDPMScheduler.from_pretrained(model_name, subfolder="scheduler")
|
| 174 |
+
|
| 175 |
+
# Move to device
|
| 176 |
+
text_encoder.to(device)
|
| 177 |
+
vae.to(device)
|
| 178 |
+
unet.to(device)
|
| 179 |
+
|
| 180 |
+
# Set to eval mode (we only train LoRA adapters)
|
| 181 |
+
text_encoder.eval()
|
| 182 |
+
vae.eval()
|
| 183 |
+
unet.train() # UNet needs to be in train mode for LoRA
|
| 184 |
+
|
| 185 |
+
return tokenizer, text_encoder, vae, unet, noise_scheduler
|
| 186 |
+
|
| 187 |
+
def setup_lora(unet: UNet2DConditionModel, lora_rank: int, lora_alpha: int):
|
| 188 |
+
"""Setup LoRA adapters for UNet."""
|
| 189 |
+
print(f"Setting up LoRA with rank={lora_rank}, alpha={lora_alpha}")
|
| 190 |
+
|
| 191 |
+
# Define LoRA config
|
| 192 |
+
lora_config = LoraConfig(
|
| 193 |
+
r=lora_rank,
|
| 194 |
+
lora_alpha=lora_alpha,
|
| 195 |
+
target_modules=[
|
| 196 |
+
"to_k", "to_q", "to_v", "to_out.0",
|
| 197 |
+
"proj_in", "proj_out",
|
| 198 |
+
"ff.net.0.proj", "ff.net.2"
|
| 199 |
+
],
|
| 200 |
+
lora_dropout=0.1,
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Apply LoRA to UNet
|
| 204 |
+
unet = get_peft_model(unet, lora_config)
|
| 205 |
+
|
| 206 |
+
# Print trainable parameters
|
| 207 |
+
trainable_params = sum(p.numel() for p in unet.parameters() if p.requires_grad)
|
| 208 |
+
total_params = sum(p.numel() for p in unet.parameters())
|
| 209 |
+
|
| 210 |
+
print(f"Trainable parameters: {trainable_params:,} ({100 * trainable_params / total_params:.2f}%)")
|
| 211 |
+
|
| 212 |
+
return unet
|
| 213 |
+
|
| 214 |
+
def encode_text(tokenizer, text_encoder, captions: List[str], device: str):
|
| 215 |
+
"""Encode text captions."""
|
| 216 |
+
inputs = tokenizer(
|
| 217 |
+
captions,
|
| 218 |
+
padding="max_length",
|
| 219 |
+
max_length=tokenizer.model_max_length,
|
| 220 |
+
truncation=True,
|
| 221 |
+
return_tensors="pt"
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
with torch.no_grad():
|
| 225 |
+
text_embeddings = text_encoder(inputs.input_ids.to(device))[0]
|
| 226 |
+
|
| 227 |
+
return text_embeddings
|
| 228 |
+
|
| 229 |
+
def training_step(batch, unet, vae, text_encoder, tokenizer, noise_scheduler, device):
|
| 230 |
+
"""Single training step."""
|
| 231 |
+
pixel_values = batch['pixel_values'].to(device)
|
| 232 |
+
captions = batch['caption']
|
| 233 |
+
|
| 234 |
+
# Encode images to latent space
|
| 235 |
+
with torch.no_grad():
|
| 236 |
+
latents = vae.encode(pixel_values).latent_dist.sample()
|
| 237 |
+
latents = latents * vae.config.scaling_factor
|
| 238 |
+
|
| 239 |
+
# Sample noise
|
| 240 |
+
noise = torch.randn_like(latents)
|
| 241 |
+
batch_size = latents.shape[0]
|
| 242 |
+
|
| 243 |
+
# Sample random timesteps
|
| 244 |
+
timesteps = torch.randint(
|
| 245 |
+
0, noise_scheduler.config.num_train_timesteps,
|
| 246 |
+
(batch_size,), device=device
|
| 247 |
+
).long()
|
| 248 |
+
|
| 249 |
+
# Add noise to latents
|
| 250 |
+
noisy_latents = noise_scheduler.add_noise(latents, noise, timesteps)
|
| 251 |
+
|
| 252 |
+
# Encode text
|
| 253 |
+
text_embeddings = encode_text(tokenizer, text_encoder, captions, device)
|
| 254 |
+
|
| 255 |
+
# Predict noise
|
| 256 |
+
noise_pred = unet(noisy_latents, timesteps, text_embeddings).sample
|
| 257 |
+
|
| 258 |
+
# Calculate loss
|
| 259 |
+
loss = F.mse_loss(noise_pred.float(), noise.float(), reduction="mean")
|
| 260 |
+
|
| 261 |
+
return loss
|
| 262 |
+
|
| 263 |
+
def validate_model(val_dataloader, unet, vae, text_encoder, tokenizer, noise_scheduler, device):
|
| 264 |
+
"""Validation step."""
|
| 265 |
+
unet.eval()
|
| 266 |
+
total_loss = 0
|
| 267 |
+
num_batches = 0
|
| 268 |
+
|
| 269 |
+
with torch.no_grad():
|
| 270 |
+
for batch in val_dataloader:
|
| 271 |
+
loss = training_step(batch, unet, vae, text_encoder, tokenizer, noise_scheduler, device)
|
| 272 |
+
total_loss += loss.item()
|
| 273 |
+
num_batches += 1
|
| 274 |
+
|
| 275 |
+
unet.train()
|
| 276 |
+
return total_loss / num_batches if num_batches > 0 else 0
|
| 277 |
+
|
| 278 |
+
def save_lora_weights(unet, output_dir: Path, step: int):
|
| 279 |
+
"""Save LoRA weights."""
|
| 280 |
+
checkpoint_dir = output_dir / f"checkpoint-{step}"
|
| 281 |
+
checkpoint_dir.mkdir(parents=True, exist_ok=True)
|
| 282 |
+
|
| 283 |
+
# Save LoRA weights
|
| 284 |
+
unet.save_pretrained(checkpoint_dir)
|
| 285 |
+
|
| 286 |
+
print(f"💾 Saved checkpoint to: {checkpoint_dir}")
|
| 287 |
+
return checkpoint_dir
|
| 288 |
+
|
| 289 |
+
# -------- 4. MAIN TRAINING FUNCTION --------
|
| 290 |
+
|
| 291 |
+
def train_lora(args):
|
| 292 |
+
"""Main training function."""
|
| 293 |
+
print(f"🎨 CompI Phase 1.E: Starting LoRA Training")
|
| 294 |
+
print("=" * 50)
|
| 295 |
+
|
| 296 |
+
# Setup device
|
| 297 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 298 |
+
print(f"🖥️ Using device: {device}")
|
| 299 |
+
|
| 300 |
+
# Load dataset info
|
| 301 |
+
dataset_dir = Path(args.dataset_dir)
|
| 302 |
+
info_file = dataset_dir / "dataset_info.json"
|
| 303 |
+
|
| 304 |
+
if info_file.exists():
|
| 305 |
+
with open(info_file) as f:
|
| 306 |
+
dataset_info = json.load(f)
|
| 307 |
+
style_name = dataset_info.get('style_name', 'custom_style')
|
| 308 |
+
print(f"🎯 Training style: {style_name}")
|
| 309 |
+
else:
|
| 310 |
+
style_name = dataset_dir.name
|
| 311 |
+
print(f"⚠️ No dataset info found, using directory name: {style_name}")
|
| 312 |
+
|
| 313 |
+
# Setup output directory
|
| 314 |
+
if args.output_dir:
|
| 315 |
+
output_dir = Path(args.output_dir)
|
| 316 |
+
else:
|
| 317 |
+
output_dir = Path("lora_models") / style_name
|
| 318 |
+
|
| 319 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 320 |
+
print(f"📁 Output directory: {output_dir}")
|
| 321 |
+
|
| 322 |
+
# Load datasets
|
| 323 |
+
print(f"📊 Loading datasets...")
|
| 324 |
+
train_dataset = StyleDataset(args.dataset_dir, "train", args.resolution)
|
| 325 |
+
|
| 326 |
+
try:
|
| 327 |
+
val_dataset = StyleDataset(args.dataset_dir, "validation", args.resolution)
|
| 328 |
+
has_validation = True
|
| 329 |
+
except FileNotFoundError:
|
| 330 |
+
print("⚠️ No validation set found, using train set for validation")
|
| 331 |
+
val_dataset = train_dataset
|
| 332 |
+
has_validation = False
|
| 333 |
+
|
| 334 |
+
# Create data loaders
|
| 335 |
+
train_dataloader = DataLoader(
|
| 336 |
+
train_dataset,
|
| 337 |
+
batch_size=args.batch_size,
|
| 338 |
+
shuffle=True,
|
| 339 |
+
num_workers=2,
|
| 340 |
+
pin_memory=True
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
val_dataloader = DataLoader(
|
| 344 |
+
val_dataset,
|
| 345 |
+
batch_size=args.batch_size,
|
| 346 |
+
shuffle=False,
|
| 347 |
+
num_workers=2,
|
| 348 |
+
pin_memory=True
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
# Load models
|
| 352 |
+
tokenizer, text_encoder, vae, unet, noise_scheduler = load_models(args.model_name, device)
|
| 353 |
+
|
| 354 |
+
# Setup LoRA
|
| 355 |
+
unet = setup_lora(unet, args.lora_rank, args.lora_alpha)
|
| 356 |
+
|
| 357 |
+
# Setup optimizer
|
| 358 |
+
optimizer = torch.optim.AdamW(
|
| 359 |
+
unet.parameters(),
|
| 360 |
+
lr=args.learning_rate,
|
| 361 |
+
betas=(0.9, 0.999),
|
| 362 |
+
weight_decay=0.01,
|
| 363 |
+
eps=1e-08
|
| 364 |
+
)
|
| 365 |
+
|
| 366 |
+
# Calculate total steps
|
| 367 |
+
total_steps = len(train_dataloader) * args.epochs
|
| 368 |
+
print(f"📈 Total training steps: {total_steps}")
|
| 369 |
+
|
| 370 |
+
# Training loop
|
| 371 |
+
print(f"\n🚀 Starting training...")
|
| 372 |
+
global_step = 0
|
| 373 |
+
best_val_loss = float('inf')
|
| 374 |
+
|
| 375 |
+
for epoch in range(args.epochs):
|
| 376 |
+
print(f"\n📅 Epoch {epoch + 1}/{args.epochs}")
|
| 377 |
+
|
| 378 |
+
epoch_loss = 0
|
| 379 |
+
progress_bar = tqdm(train_dataloader, desc=f"Training")
|
| 380 |
+
|
| 381 |
+
for batch in progress_bar:
|
| 382 |
+
# Training step
|
| 383 |
+
loss = training_step(batch, unet, vae, text_encoder, tokenizer, noise_scheduler, device)
|
| 384 |
+
|
| 385 |
+
# Backward pass
|
| 386 |
+
loss.backward()
|
| 387 |
+
optimizer.step()
|
| 388 |
+
optimizer.zero_grad()
|
| 389 |
+
|
| 390 |
+
# Update metrics
|
| 391 |
+
epoch_loss += loss.item()
|
| 392 |
+
global_step += 1
|
| 393 |
+
|
| 394 |
+
# Update progress bar
|
| 395 |
+
progress_bar.set_postfix({
|
| 396 |
+
'loss': f"{loss.item():.4f}",
|
| 397 |
+
'avg_loss': f"{epoch_loss / (progress_bar.n + 1):.4f}"
|
| 398 |
+
})
|
| 399 |
+
|
| 400 |
+
# Validation
|
| 401 |
+
if global_step % args.validation_steps == 0:
|
| 402 |
+
val_loss = validate_model(val_dataloader, unet, vae, text_encoder, tokenizer, noise_scheduler, device)
|
| 403 |
+
print(f"\n📊 Step {global_step}: Train Loss = {loss.item():.4f}, Val Loss = {val_loss:.4f}")
|
| 404 |
+
|
| 405 |
+
# Save best model
|
| 406 |
+
if val_loss < best_val_loss:
|
| 407 |
+
best_val_loss = val_loss
|
| 408 |
+
save_lora_weights(unet, output_dir, global_step)
|
| 409 |
+
|
| 410 |
+
# Save checkpoint
|
| 411 |
+
if global_step % args.save_steps == 0:
|
| 412 |
+
save_lora_weights(unet, output_dir, global_step)
|
| 413 |
+
|
| 414 |
+
# End of epoch
|
| 415 |
+
avg_epoch_loss = epoch_loss / len(train_dataloader)
|
| 416 |
+
print(f"📊 Epoch {epoch + 1} complete. Average loss: {avg_epoch_loss:.4f}")
|
| 417 |
+
|
| 418 |
+
# Save final model
|
| 419 |
+
final_checkpoint = save_lora_weights(unet, output_dir, global_step)
|
| 420 |
+
|
| 421 |
+
# Save training info
|
| 422 |
+
training_info = {
|
| 423 |
+
'style_name': style_name,
|
| 424 |
+
'model_name': args.model_name,
|
| 425 |
+
'total_steps': global_step,
|
| 426 |
+
'epochs': args.epochs,
|
| 427 |
+
'learning_rate': args.learning_rate,
|
| 428 |
+
'lora_rank': args.lora_rank,
|
| 429 |
+
'lora_alpha': args.lora_alpha,
|
| 430 |
+
'final_checkpoint': str(final_checkpoint),
|
| 431 |
+
'best_val_loss': best_val_loss
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
with open(output_dir / "training_info.json", 'w') as f:
|
| 435 |
+
json.dump(training_info, f, indent=2)
|
| 436 |
+
|
| 437 |
+
print(f"\n🎉 Training complete!")
|
| 438 |
+
print(f"📁 LoRA weights saved to: {output_dir}")
|
| 439 |
+
print(f"💡 Next steps:")
|
| 440 |
+
print(f" 1. Test your style: python src/generators/compi_phase1e_style_generation.py --lora-path {final_checkpoint}")
|
| 441 |
+
print(f" 2. Integrate with UI: Use the style in your Streamlit interface")
|
| 442 |
+
|
| 443 |
+
def main():
|
| 444 |
+
"""Main function."""
|
| 445 |
+
args = setup_args()
|
| 446 |
+
|
| 447 |
+
try:
|
| 448 |
+
train_lora(args)
|
| 449 |
+
except Exception as e:
|
| 450 |
+
print(f"❌ Training failed: {e}")
|
| 451 |
+
import traceback
|
| 452 |
+
traceback.print_exc()
|
| 453 |
+
return 1
|
| 454 |
+
|
| 455 |
+
return 0
|
| 456 |
+
|
| 457 |
+
if __name__ == "__main__":
|
| 458 |
+
exit(main())
|
src/generators/compi_phase1e_style_generation.py
ADDED
|
@@ -0,0 +1,406 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 1.E: Personal Style Generation with LoRA
|
| 4 |
+
|
| 5 |
+
Generate images using your trained LoRA personal style weights.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
python src/generators/compi_phase1e_style_generation.py --lora-path lora_models/my_style/checkpoint-1000
|
| 9 |
+
python src/generators/compi_phase1e_style_generation.py --help
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import argparse
|
| 14 |
+
import json
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import Optional, List
|
| 18 |
+
|
| 19 |
+
import torch
|
| 20 |
+
from PIL import Image
|
| 21 |
+
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
|
| 22 |
+
from peft import PeftModel
|
| 23 |
+
|
| 24 |
+
# -------- 1. CONFIGURATION --------
|
| 25 |
+
|
| 26 |
+
DEFAULT_MODEL = "runwayml/stable-diffusion-v1-5"
|
| 27 |
+
DEFAULT_STEPS = 30
|
| 28 |
+
DEFAULT_GUIDANCE = 7.5
|
| 29 |
+
DEFAULT_WIDTH = 512
|
| 30 |
+
DEFAULT_HEIGHT = 512
|
| 31 |
+
OUTPUT_DIR = "outputs"
|
| 32 |
+
|
| 33 |
+
# -------- 2. UTILITY FUNCTIONS --------
|
| 34 |
+
|
| 35 |
+
def setup_args():
|
| 36 |
+
"""Setup command line arguments."""
|
| 37 |
+
parser = argparse.ArgumentParser(
|
| 38 |
+
description="CompI Phase 1.E: Personal Style Generation with LoRA",
|
| 39 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 40 |
+
epilog="""
|
| 41 |
+
Examples:
|
| 42 |
+
# Generate with trained LoRA style
|
| 43 |
+
python %(prog)s --lora-path lora_models/my_style/checkpoint-1000 "a cat in my_style"
|
| 44 |
+
|
| 45 |
+
# Interactive mode
|
| 46 |
+
python %(prog)s --lora-path lora_models/my_style/checkpoint-1000 --interactive
|
| 47 |
+
|
| 48 |
+
# Multiple variations
|
| 49 |
+
python %(prog)s --lora-path lora_models/my_style/checkpoint-1000 "landscape" --variations 4
|
| 50 |
+
"""
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
parser.add_argument("prompt", nargs="*", help="Text prompt for generation")
|
| 54 |
+
|
| 55 |
+
parser.add_argument("--lora-path", required=True,
|
| 56 |
+
help="Path to trained LoRA checkpoint directory")
|
| 57 |
+
|
| 58 |
+
parser.add_argument("--model-name", default=DEFAULT_MODEL,
|
| 59 |
+
help=f"Base Stable Diffusion model (default: {DEFAULT_MODEL})")
|
| 60 |
+
|
| 61 |
+
parser.add_argument("--variations", "-v", type=int, default=1,
|
| 62 |
+
help="Number of variations to generate")
|
| 63 |
+
|
| 64 |
+
parser.add_argument("--steps", type=int, default=DEFAULT_STEPS,
|
| 65 |
+
help=f"Number of inference steps (default: {DEFAULT_STEPS})")
|
| 66 |
+
|
| 67 |
+
parser.add_argument("--guidance", type=float, default=DEFAULT_GUIDANCE,
|
| 68 |
+
help=f"Guidance scale (default: {DEFAULT_GUIDANCE})")
|
| 69 |
+
|
| 70 |
+
parser.add_argument("--width", type=int, default=DEFAULT_WIDTH,
|
| 71 |
+
help=f"Image width (default: {DEFAULT_WIDTH})")
|
| 72 |
+
|
| 73 |
+
parser.add_argument("--height", type=int, default=DEFAULT_HEIGHT,
|
| 74 |
+
help=f"Image height (default: {DEFAULT_HEIGHT})")
|
| 75 |
+
|
| 76 |
+
parser.add_argument("--seed", type=int,
|
| 77 |
+
help="Random seed for reproducible generation")
|
| 78 |
+
|
| 79 |
+
parser.add_argument("--negative", "-n", default="",
|
| 80 |
+
help="Negative prompt")
|
| 81 |
+
|
| 82 |
+
parser.add_argument("--lora-scale", type=float, default=1.0,
|
| 83 |
+
help="LoRA scale factor (0.0-2.0, default: 1.0)")
|
| 84 |
+
|
| 85 |
+
parser.add_argument("--interactive", "-i", action="store_true",
|
| 86 |
+
help="Interactive mode")
|
| 87 |
+
|
| 88 |
+
parser.add_argument("--output-dir", default=OUTPUT_DIR,
|
| 89 |
+
help=f"Output directory (default: {OUTPUT_DIR})")
|
| 90 |
+
|
| 91 |
+
parser.add_argument("--list-styles", action="store_true",
|
| 92 |
+
help="List available LoRA styles")
|
| 93 |
+
|
| 94 |
+
return parser.parse_args()
|
| 95 |
+
|
| 96 |
+
def load_lora_info(lora_path: str) -> dict:
|
| 97 |
+
"""Load LoRA training information."""
|
| 98 |
+
lora_dir = Path(lora_path)
|
| 99 |
+
|
| 100 |
+
# Try to find training info
|
| 101 |
+
info_files = [
|
| 102 |
+
lora_dir / "training_info.json",
|
| 103 |
+
lora_dir.parent / "training_info.json"
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
for info_file in info_files:
|
| 107 |
+
if info_file.exists():
|
| 108 |
+
with open(info_file) as f:
|
| 109 |
+
return json.load(f)
|
| 110 |
+
|
| 111 |
+
# Fallback info
|
| 112 |
+
return {
|
| 113 |
+
'style_name': lora_dir.parent.name,
|
| 114 |
+
'model_name': DEFAULT_MODEL,
|
| 115 |
+
'lora_rank': 4,
|
| 116 |
+
'lora_alpha': 32
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
def load_pipeline_with_lora(model_name: str, lora_path: str, device: str):
|
| 120 |
+
"""Load Stable Diffusion pipeline with LoRA weights."""
|
| 121 |
+
print(f"🔄 Loading base model: {model_name}")
|
| 122 |
+
|
| 123 |
+
# Load base pipeline
|
| 124 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 125 |
+
model_name,
|
| 126 |
+
torch_dtype=torch.float16 if device == "cuda" else torch.float32,
|
| 127 |
+
safety_checker=None,
|
| 128 |
+
requires_safety_checker=False
|
| 129 |
+
)
|
| 130 |
+
|
| 131 |
+
# Use DPM solver for faster inference
|
| 132 |
+
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
|
| 133 |
+
|
| 134 |
+
print(f"🎨 Loading LoRA weights from: {lora_path}")
|
| 135 |
+
|
| 136 |
+
# Load LoRA weights
|
| 137 |
+
lora_dir = Path(lora_path)
|
| 138 |
+
if not lora_dir.exists():
|
| 139 |
+
raise FileNotFoundError(f"LoRA path not found: {lora_path}")
|
| 140 |
+
|
| 141 |
+
# Apply LoRA to UNet
|
| 142 |
+
pipe.unet = PeftModel.from_pretrained(pipe.unet, lora_path)
|
| 143 |
+
|
| 144 |
+
# Move to device
|
| 145 |
+
pipe = pipe.to(device)
|
| 146 |
+
|
| 147 |
+
# Enable memory efficient attention if available
|
| 148 |
+
if hasattr(pipe, "enable_xformers_memory_efficient_attention"):
|
| 149 |
+
try:
|
| 150 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 151 |
+
except Exception:
|
| 152 |
+
pass
|
| 153 |
+
|
| 154 |
+
return pipe
|
| 155 |
+
|
| 156 |
+
def generate_with_style(
|
| 157 |
+
pipe,
|
| 158 |
+
prompt: str,
|
| 159 |
+
negative_prompt: str = "",
|
| 160 |
+
num_inference_steps: int = DEFAULT_STEPS,
|
| 161 |
+
guidance_scale: float = DEFAULT_GUIDANCE,
|
| 162 |
+
width: int = DEFAULT_WIDTH,
|
| 163 |
+
height: int = DEFAULT_HEIGHT,
|
| 164 |
+
seed: Optional[int] = None,
|
| 165 |
+
lora_scale: float = 1.0
|
| 166 |
+
):
|
| 167 |
+
"""Generate image with LoRA style."""
|
| 168 |
+
|
| 169 |
+
# Set LoRA scale
|
| 170 |
+
if hasattr(pipe.unet, 'set_adapter_scale'):
|
| 171 |
+
pipe.unet.set_adapter_scale(lora_scale)
|
| 172 |
+
|
| 173 |
+
# Setup generator
|
| 174 |
+
if seed is not None:
|
| 175 |
+
generator = torch.Generator(device=pipe.device).manual_seed(seed)
|
| 176 |
+
else:
|
| 177 |
+
generator = None
|
| 178 |
+
seed = torch.seed()
|
| 179 |
+
|
| 180 |
+
# Generate image
|
| 181 |
+
with torch.autocast(pipe.device.type):
|
| 182 |
+
result = pipe(
|
| 183 |
+
prompt=prompt,
|
| 184 |
+
negative_prompt=negative_prompt,
|
| 185 |
+
num_inference_steps=num_inference_steps,
|
| 186 |
+
guidance_scale=guidance_scale,
|
| 187 |
+
width=width,
|
| 188 |
+
height=height,
|
| 189 |
+
generator=generator
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
return result.images[0], seed
|
| 193 |
+
|
| 194 |
+
def save_generated_image(
|
| 195 |
+
image: Image.Image,
|
| 196 |
+
prompt: str,
|
| 197 |
+
style_name: str,
|
| 198 |
+
seed: int,
|
| 199 |
+
variation: int,
|
| 200 |
+
output_dir: str,
|
| 201 |
+
metadata: dict = None
|
| 202 |
+
):
|
| 203 |
+
"""Save generated image with metadata."""
|
| 204 |
+
|
| 205 |
+
# Create output directory
|
| 206 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 207 |
+
|
| 208 |
+
# Generate filename
|
| 209 |
+
prompt_slug = "_".join(prompt.lower().split()[:5])
|
| 210 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 211 |
+
filename = f"{prompt_slug[:25]}_lora_{style_name}_{timestamp}_seed{seed}_v{variation}.png"
|
| 212 |
+
filepath = os.path.join(output_dir, filename)
|
| 213 |
+
|
| 214 |
+
# Save image
|
| 215 |
+
image.save(filepath)
|
| 216 |
+
|
| 217 |
+
# Save metadata if provided
|
| 218 |
+
if metadata:
|
| 219 |
+
metadata_file = filepath.replace('.png', '_metadata.json')
|
| 220 |
+
with open(metadata_file, 'w') as f:
|
| 221 |
+
json.dump(metadata, f, indent=2)
|
| 222 |
+
|
| 223 |
+
return filepath
|
| 224 |
+
|
| 225 |
+
def list_available_styles():
|
| 226 |
+
"""List available LoRA styles."""
|
| 227 |
+
lora_dir = Path("lora_models")
|
| 228 |
+
|
| 229 |
+
if not lora_dir.exists():
|
| 230 |
+
print("❌ No LoRA models directory found")
|
| 231 |
+
return
|
| 232 |
+
|
| 233 |
+
print("🎨 Available LoRA Styles:")
|
| 234 |
+
print("=" * 40)
|
| 235 |
+
|
| 236 |
+
styles_found = False
|
| 237 |
+
for style_dir in lora_dir.iterdir():
|
| 238 |
+
if style_dir.is_dir():
|
| 239 |
+
# Look for checkpoints
|
| 240 |
+
checkpoints = list(style_dir.glob("checkpoint-*"))
|
| 241 |
+
if checkpoints:
|
| 242 |
+
styles_found = True
|
| 243 |
+
latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split('-')[1]))
|
| 244 |
+
|
| 245 |
+
# Load info if available
|
| 246 |
+
info_file = style_dir / "training_info.json"
|
| 247 |
+
if info_file.exists():
|
| 248 |
+
with open(info_file) as f:
|
| 249 |
+
info = json.load(f)
|
| 250 |
+
print(f"📁 {style_dir.name}")
|
| 251 |
+
print(f" Latest: {latest_checkpoint.name}")
|
| 252 |
+
print(f" Steps: {info.get('total_steps', 'unknown')}")
|
| 253 |
+
print(f" Model: {info.get('model_name', 'unknown')}")
|
| 254 |
+
else:
|
| 255 |
+
print(f"📁 {style_dir.name}")
|
| 256 |
+
print(f" Latest: {latest_checkpoint.name}")
|
| 257 |
+
print()
|
| 258 |
+
|
| 259 |
+
if not styles_found:
|
| 260 |
+
print("❌ No trained LoRA styles found")
|
| 261 |
+
print("💡 Train a style first using: python src/generators/compi_phase1e_lora_training.py")
|
| 262 |
+
|
| 263 |
+
def interactive_generation(pipe, lora_info: dict, args):
|
| 264 |
+
"""Interactive generation mode."""
|
| 265 |
+
style_name = lora_info.get('style_name', 'custom')
|
| 266 |
+
|
| 267 |
+
print(f"🎨 Interactive LoRA Style Generation - {style_name}")
|
| 268 |
+
print("=" * 50)
|
| 269 |
+
print("💡 Tips:")
|
| 270 |
+
print(f" - Include '{style_name}' or trigger words in your prompts")
|
| 271 |
+
print(f" - Adjust LoRA scale (0.0-2.0) to control style strength")
|
| 272 |
+
print(" - Type 'quit' to exit")
|
| 273 |
+
print()
|
| 274 |
+
|
| 275 |
+
while True:
|
| 276 |
+
try:
|
| 277 |
+
# Get prompt
|
| 278 |
+
prompt = input("Enter prompt: ").strip()
|
| 279 |
+
if not prompt or prompt.lower() == 'quit':
|
| 280 |
+
break
|
| 281 |
+
|
| 282 |
+
# Get optional parameters
|
| 283 |
+
variations = input(f"Variations (default: 1): ").strip()
|
| 284 |
+
variations = int(variations) if variations.isdigit() else 1
|
| 285 |
+
|
| 286 |
+
lora_scale = input(f"LoRA scale (default: {args.lora_scale}): ").strip()
|
| 287 |
+
lora_scale = float(lora_scale) if lora_scale else args.lora_scale
|
| 288 |
+
|
| 289 |
+
# Generate images
|
| 290 |
+
print(f"🎨 Generating {variations} variation(s)...")
|
| 291 |
+
|
| 292 |
+
for i in range(variations):
|
| 293 |
+
image, seed = generate_with_style(
|
| 294 |
+
pipe, prompt, args.negative,
|
| 295 |
+
args.steps, args.guidance,
|
| 296 |
+
args.width, args.height,
|
| 297 |
+
args.seed, lora_scale
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
# Save image
|
| 301 |
+
filepath = save_generated_image(
|
| 302 |
+
image, prompt, style_name, seed, i + 1, args.output_dir,
|
| 303 |
+
{
|
| 304 |
+
'prompt': prompt,
|
| 305 |
+
'negative_prompt': args.negative,
|
| 306 |
+
'style_name': style_name,
|
| 307 |
+
'lora_scale': lora_scale,
|
| 308 |
+
'seed': seed,
|
| 309 |
+
'steps': args.steps,
|
| 310 |
+
'guidance_scale': args.guidance,
|
| 311 |
+
'timestamp': datetime.now().isoformat()
|
| 312 |
+
}
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
print(f"✅ Saved: {filepath}")
|
| 316 |
+
|
| 317 |
+
print()
|
| 318 |
+
|
| 319 |
+
except KeyboardInterrupt:
|
| 320 |
+
break
|
| 321 |
+
except Exception as e:
|
| 322 |
+
print(f"❌ Error: {e}")
|
| 323 |
+
print()
|
| 324 |
+
|
| 325 |
+
def main():
|
| 326 |
+
"""Main function."""
|
| 327 |
+
args = setup_args()
|
| 328 |
+
|
| 329 |
+
# List styles if requested
|
| 330 |
+
if args.list_styles:
|
| 331 |
+
list_available_styles()
|
| 332 |
+
return 0
|
| 333 |
+
|
| 334 |
+
# Check LoRA path
|
| 335 |
+
if not os.path.exists(args.lora_path):
|
| 336 |
+
print(f"❌ LoRA path not found: {args.lora_path}")
|
| 337 |
+
return 1
|
| 338 |
+
|
| 339 |
+
# Load LoRA info
|
| 340 |
+
lora_info = load_lora_info(args.lora_path)
|
| 341 |
+
style_name = lora_info.get('style_name', 'custom')
|
| 342 |
+
|
| 343 |
+
print(f"🎨 CompI Phase 1.E: Personal Style Generation")
|
| 344 |
+
print(f"Style: {style_name}")
|
| 345 |
+
print("=" * 50)
|
| 346 |
+
|
| 347 |
+
# Setup device
|
| 348 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 349 |
+
print(f"🖥️ Using device: {device}")
|
| 350 |
+
|
| 351 |
+
# Load pipeline
|
| 352 |
+
try:
|
| 353 |
+
pipe = load_pipeline_with_lora(args.model_name, args.lora_path, device)
|
| 354 |
+
print("✅ Pipeline loaded successfully")
|
| 355 |
+
except Exception as e:
|
| 356 |
+
print(f"❌ Failed to load pipeline: {e}")
|
| 357 |
+
return 1
|
| 358 |
+
|
| 359 |
+
# Interactive mode
|
| 360 |
+
if args.interactive:
|
| 361 |
+
interactive_generation(pipe, lora_info, args)
|
| 362 |
+
return 0
|
| 363 |
+
|
| 364 |
+
# Command line mode
|
| 365 |
+
prompt = " ".join(args.prompt) if args.prompt else input("Enter prompt: ").strip()
|
| 366 |
+
if not prompt:
|
| 367 |
+
print("❌ No prompt provided")
|
| 368 |
+
return 1
|
| 369 |
+
|
| 370 |
+
print(f"🎨 Generating {args.variations} variation(s) for: {prompt}")
|
| 371 |
+
|
| 372 |
+
# Generate images
|
| 373 |
+
for i in range(args.variations):
|
| 374 |
+
try:
|
| 375 |
+
image, seed = generate_with_style(
|
| 376 |
+
pipe, prompt, args.negative,
|
| 377 |
+
args.steps, args.guidance,
|
| 378 |
+
args.width, args.height,
|
| 379 |
+
args.seed, args.lora_scale
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
# Save image
|
| 383 |
+
filepath = save_generated_image(
|
| 384 |
+
image, prompt, style_name, seed, i + 1, args.output_dir,
|
| 385 |
+
{
|
| 386 |
+
'prompt': prompt,
|
| 387 |
+
'negative_prompt': args.negative,
|
| 388 |
+
'style_name': style_name,
|
| 389 |
+
'lora_scale': args.lora_scale,
|
| 390 |
+
'seed': seed,
|
| 391 |
+
'steps': args.steps,
|
| 392 |
+
'guidance_scale': args.guidance,
|
| 393 |
+
'timestamp': datetime.now().isoformat()
|
| 394 |
+
}
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
print(f"✅ Generated variation {i + 1}: {filepath}")
|
| 398 |
+
|
| 399 |
+
except Exception as e:
|
| 400 |
+
print(f"❌ Error generating variation {i + 1}: {e}")
|
| 401 |
+
|
| 402 |
+
print("🎉 Generation complete!")
|
| 403 |
+
return 0
|
| 404 |
+
|
| 405 |
+
if __name__ == "__main__":
|
| 406 |
+
exit(main())
|
src/generators/compi_phase1e_style_manager.py
ADDED
|
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 1.E: LoRA Style Management System
|
| 4 |
+
|
| 5 |
+
Manage multiple LoRA styles, switch between them, and organize trained models.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
python src/generators/compi_phase1e_style_manager.py --list
|
| 9 |
+
python src/generators/compi_phase1e_style_manager.py --info my_style
|
| 10 |
+
python src/generators/compi_phase1e_style_manager.py --cleanup
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
import argparse
|
| 15 |
+
import json
|
| 16 |
+
import shutil
|
| 17 |
+
from datetime import datetime
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
from typing import Dict, List, Optional, Tuple
|
| 20 |
+
|
| 21 |
+
import pandas as pd
|
| 22 |
+
|
| 23 |
+
# -------- 1. CONFIGURATION --------
|
| 24 |
+
|
| 25 |
+
LORA_MODELS_DIR = "lora_models"
|
| 26 |
+
STYLES_CONFIG_FILE = "lora_styles_config.json"
|
| 27 |
+
|
| 28 |
+
# -------- 2. STYLE MANAGEMENT CLASS --------
|
| 29 |
+
|
| 30 |
+
class LoRAStyleManager:
|
| 31 |
+
"""Manager for LoRA styles and models."""
|
| 32 |
+
|
| 33 |
+
def __init__(self, models_dir: str = LORA_MODELS_DIR):
|
| 34 |
+
self.models_dir = Path(models_dir)
|
| 35 |
+
self.models_dir.mkdir(exist_ok=True)
|
| 36 |
+
self.config_file = self.models_dir / STYLES_CONFIG_FILE
|
| 37 |
+
self.config = self.load_config()
|
| 38 |
+
|
| 39 |
+
def load_config(self) -> Dict:
|
| 40 |
+
"""Load styles configuration."""
|
| 41 |
+
if self.config_file.exists():
|
| 42 |
+
with open(self.config_file) as f:
|
| 43 |
+
return json.load(f)
|
| 44 |
+
return {"styles": {}, "last_updated": datetime.now().isoformat()}
|
| 45 |
+
|
| 46 |
+
def save_config(self):
|
| 47 |
+
"""Save styles configuration."""
|
| 48 |
+
self.config["last_updated"] = datetime.now().isoformat()
|
| 49 |
+
with open(self.config_file, 'w') as f:
|
| 50 |
+
json.dump(self.config, f, indent=2)
|
| 51 |
+
|
| 52 |
+
def scan_styles(self) -> Dict[str, Dict]:
|
| 53 |
+
"""Scan for available LoRA styles."""
|
| 54 |
+
styles = {}
|
| 55 |
+
|
| 56 |
+
for style_dir in self.models_dir.iterdir():
|
| 57 |
+
if not style_dir.is_dir() or style_dir.name.startswith('.'):
|
| 58 |
+
continue
|
| 59 |
+
|
| 60 |
+
# Look for checkpoints
|
| 61 |
+
checkpoints = list(style_dir.glob("checkpoint-*"))
|
| 62 |
+
if not checkpoints:
|
| 63 |
+
continue
|
| 64 |
+
|
| 65 |
+
# Get latest checkpoint
|
| 66 |
+
latest_checkpoint = max(checkpoints, key=lambda x: int(x.name.split('-')[1]))
|
| 67 |
+
|
| 68 |
+
# Load training info
|
| 69 |
+
info_file = style_dir / "training_info.json"
|
| 70 |
+
if info_file.exists():
|
| 71 |
+
with open(info_file) as f:
|
| 72 |
+
training_info = json.load(f)
|
| 73 |
+
else:
|
| 74 |
+
training_info = {}
|
| 75 |
+
|
| 76 |
+
# Load dataset info if available
|
| 77 |
+
dataset_info = {}
|
| 78 |
+
for dataset_dir in [style_dir / "dataset", Path("datasets") / style_dir.name]:
|
| 79 |
+
dataset_info_file = dataset_dir / "dataset_info.json"
|
| 80 |
+
if dataset_info_file.exists():
|
| 81 |
+
with open(dataset_info_file) as f:
|
| 82 |
+
dataset_info = json.load(f)
|
| 83 |
+
break
|
| 84 |
+
|
| 85 |
+
# Compile style information
|
| 86 |
+
style_info = {
|
| 87 |
+
"name": style_dir.name,
|
| 88 |
+
"path": str(style_dir),
|
| 89 |
+
"latest_checkpoint": str(latest_checkpoint),
|
| 90 |
+
"checkpoints": [str(cp) for cp in checkpoints],
|
| 91 |
+
"training_info": training_info,
|
| 92 |
+
"dataset_info": dataset_info,
|
| 93 |
+
"last_scanned": datetime.now().isoformat()
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
styles[style_dir.name] = style_info
|
| 97 |
+
|
| 98 |
+
return styles
|
| 99 |
+
|
| 100 |
+
def refresh_styles(self):
|
| 101 |
+
"""Refresh the styles database."""
|
| 102 |
+
print("🔄 Scanning for LoRA styles...")
|
| 103 |
+
scanned_styles = self.scan_styles()
|
| 104 |
+
|
| 105 |
+
# Update config
|
| 106 |
+
self.config["styles"] = scanned_styles
|
| 107 |
+
self.save_config()
|
| 108 |
+
|
| 109 |
+
print(f"✅ Found {len(scanned_styles)} LoRA style(s)")
|
| 110 |
+
return scanned_styles
|
| 111 |
+
|
| 112 |
+
def list_styles(self, detailed: bool = False) -> List[Dict]:
|
| 113 |
+
"""List available styles."""
|
| 114 |
+
styles = self.config.get("styles", {})
|
| 115 |
+
|
| 116 |
+
if not styles:
|
| 117 |
+
styles = self.refresh_styles()
|
| 118 |
+
|
| 119 |
+
if detailed:
|
| 120 |
+
return list(styles.values())
|
| 121 |
+
else:
|
| 122 |
+
return [{"name": name, "checkpoints": len(info["checkpoints"])}
|
| 123 |
+
for name, info in styles.items()]
|
| 124 |
+
|
| 125 |
+
def get_style_info(self, style_name: str) -> Optional[Dict]:
|
| 126 |
+
"""Get detailed information about a specific style."""
|
| 127 |
+
styles = self.config.get("styles", {})
|
| 128 |
+
return styles.get(style_name)
|
| 129 |
+
|
| 130 |
+
def get_best_checkpoint(self, style_name: str) -> Optional[str]:
|
| 131 |
+
"""Get the best checkpoint for a style."""
|
| 132 |
+
style_info = self.get_style_info(style_name)
|
| 133 |
+
if not style_info:
|
| 134 |
+
return None
|
| 135 |
+
|
| 136 |
+
# For now, return the latest checkpoint
|
| 137 |
+
# Could be enhanced to track validation loss and return best performing
|
| 138 |
+
return style_info.get("latest_checkpoint")
|
| 139 |
+
|
| 140 |
+
def delete_style(self, style_name: str, confirm: bool = False) -> bool:
|
| 141 |
+
"""Delete a LoRA style."""
|
| 142 |
+
if not confirm:
|
| 143 |
+
print("⚠️ Use --confirm to actually delete the style")
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
style_dir = self.models_dir / style_name
|
| 147 |
+
if not style_dir.exists():
|
| 148 |
+
print(f"❌ Style not found: {style_name}")
|
| 149 |
+
return False
|
| 150 |
+
|
| 151 |
+
try:
|
| 152 |
+
shutil.rmtree(style_dir)
|
| 153 |
+
|
| 154 |
+
# Remove from config
|
| 155 |
+
if style_name in self.config.get("styles", {}):
|
| 156 |
+
del self.config["styles"][style_name]
|
| 157 |
+
self.save_config()
|
| 158 |
+
|
| 159 |
+
print(f"✅ Deleted style: {style_name}")
|
| 160 |
+
return True
|
| 161 |
+
|
| 162 |
+
except Exception as e:
|
| 163 |
+
print(f"❌ Error deleting style: {e}")
|
| 164 |
+
return False
|
| 165 |
+
|
| 166 |
+
def cleanup_checkpoints(self, style_name: str, keep_last: int = 3) -> int:
|
| 167 |
+
"""Clean up old checkpoints, keeping only the most recent ones."""
|
| 168 |
+
style_dir = self.models_dir / style_name
|
| 169 |
+
if not style_dir.exists():
|
| 170 |
+
print(f"❌ Style not found: {style_name}")
|
| 171 |
+
return 0
|
| 172 |
+
|
| 173 |
+
checkpoints = list(style_dir.glob("checkpoint-*"))
|
| 174 |
+
if len(checkpoints) <= keep_last:
|
| 175 |
+
print(f"✅ No cleanup needed for {style_name} ({len(checkpoints)} checkpoints)")
|
| 176 |
+
return 0
|
| 177 |
+
|
| 178 |
+
# Sort by step number
|
| 179 |
+
checkpoints.sort(key=lambda x: int(x.name.split('-')[1]))
|
| 180 |
+
|
| 181 |
+
# Remove old checkpoints
|
| 182 |
+
to_remove = checkpoints[:-keep_last]
|
| 183 |
+
removed_count = 0
|
| 184 |
+
|
| 185 |
+
for checkpoint in to_remove:
|
| 186 |
+
try:
|
| 187 |
+
shutil.rmtree(checkpoint)
|
| 188 |
+
removed_count += 1
|
| 189 |
+
except Exception as e:
|
| 190 |
+
print(f"⚠️ Failed to remove {checkpoint}: {e}")
|
| 191 |
+
|
| 192 |
+
print(f"✅ Cleaned up {removed_count} old checkpoints for {style_name}")
|
| 193 |
+
return removed_count
|
| 194 |
+
|
| 195 |
+
def export_style_info(self, output_file: str = None) -> str:
|
| 196 |
+
"""Export styles information to CSV."""
|
| 197 |
+
styles = self.list_styles(detailed=True)
|
| 198 |
+
|
| 199 |
+
if not styles:
|
| 200 |
+
print("❌ No styles found")
|
| 201 |
+
return ""
|
| 202 |
+
|
| 203 |
+
# Prepare data for CSV
|
| 204 |
+
rows = []
|
| 205 |
+
for style in styles:
|
| 206 |
+
training_info = style.get("training_info", {})
|
| 207 |
+
dataset_info = style.get("dataset_info", {})
|
| 208 |
+
|
| 209 |
+
row = {
|
| 210 |
+
"style_name": style["name"],
|
| 211 |
+
"checkpoints": len(style["checkpoints"]),
|
| 212 |
+
"latest_checkpoint": Path(style["latest_checkpoint"]).name,
|
| 213 |
+
"total_steps": training_info.get("total_steps", "unknown"),
|
| 214 |
+
"epochs": training_info.get("epochs", "unknown"),
|
| 215 |
+
"learning_rate": training_info.get("learning_rate", "unknown"),
|
| 216 |
+
"lora_rank": training_info.get("lora_rank", "unknown"),
|
| 217 |
+
"dataset_images": dataset_info.get("total_images", "unknown"),
|
| 218 |
+
"trigger_word": dataset_info.get("trigger_word", "unknown"),
|
| 219 |
+
"last_scanned": style.get("last_scanned", "unknown")
|
| 220 |
+
}
|
| 221 |
+
rows.append(row)
|
| 222 |
+
|
| 223 |
+
# Create DataFrame and save
|
| 224 |
+
df = pd.DataFrame(rows)
|
| 225 |
+
|
| 226 |
+
if output_file is None:
|
| 227 |
+
output_file = f"lora_styles_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
| 228 |
+
|
| 229 |
+
df.to_csv(output_file, index=False)
|
| 230 |
+
print(f"📊 Exported styles info to: {output_file}")
|
| 231 |
+
return output_file
|
| 232 |
+
|
| 233 |
+
# -------- 3. COMMAND LINE INTERFACE --------
|
| 234 |
+
|
| 235 |
+
def setup_args():
|
| 236 |
+
"""Setup command line arguments."""
|
| 237 |
+
parser = argparse.ArgumentParser(
|
| 238 |
+
description="CompI Phase 1.E: LoRA Style Management",
|
| 239 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 240 |
+
epilog="""
|
| 241 |
+
Examples:
|
| 242 |
+
# List all available styles
|
| 243 |
+
python %(prog)s --list
|
| 244 |
+
|
| 245 |
+
# Get detailed info about a specific style
|
| 246 |
+
python %(prog)s --info my_style
|
| 247 |
+
|
| 248 |
+
# Refresh styles database
|
| 249 |
+
python %(prog)s --refresh
|
| 250 |
+
|
| 251 |
+
# Clean up old checkpoints
|
| 252 |
+
python %(prog)s --cleanup my_style --keep 2
|
| 253 |
+
|
| 254 |
+
# Export styles information
|
| 255 |
+
python %(prog)s --export styles_report.csv
|
| 256 |
+
"""
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
parser.add_argument("--list", action="store_true",
|
| 260 |
+
help="List all available LoRA styles")
|
| 261 |
+
|
| 262 |
+
parser.add_argument("--list-detailed", action="store_true",
|
| 263 |
+
help="List styles with detailed information")
|
| 264 |
+
|
| 265 |
+
parser.add_argument("--info", metavar="STYLE_NAME",
|
| 266 |
+
help="Show detailed information about a specific style")
|
| 267 |
+
|
| 268 |
+
parser.add_argument("--refresh", action="store_true",
|
| 269 |
+
help="Refresh the styles database")
|
| 270 |
+
|
| 271 |
+
parser.add_argument("--cleanup", metavar="STYLE_NAME",
|
| 272 |
+
help="Clean up old checkpoints for a style")
|
| 273 |
+
|
| 274 |
+
parser.add_argument("--keep", type=int, default=3,
|
| 275 |
+
help="Number of recent checkpoints to keep during cleanup")
|
| 276 |
+
|
| 277 |
+
parser.add_argument("--delete", metavar="STYLE_NAME",
|
| 278 |
+
help="Delete a LoRA style")
|
| 279 |
+
|
| 280 |
+
parser.add_argument("--confirm", action="store_true",
|
| 281 |
+
help="Confirm destructive operations")
|
| 282 |
+
|
| 283 |
+
parser.add_argument("--export", metavar="OUTPUT_FILE",
|
| 284 |
+
help="Export styles information to CSV")
|
| 285 |
+
|
| 286 |
+
parser.add_argument("--models-dir", default=LORA_MODELS_DIR,
|
| 287 |
+
help=f"LoRA models directory (default: {LORA_MODELS_DIR})")
|
| 288 |
+
|
| 289 |
+
return parser.parse_args()
|
| 290 |
+
|
| 291 |
+
def print_style_info(style_info: Dict):
|
| 292 |
+
"""Print detailed style information."""
|
| 293 |
+
print(f"🎨 Style: {style_info['name']}")
|
| 294 |
+
print("=" * 40)
|
| 295 |
+
|
| 296 |
+
# Basic info
|
| 297 |
+
print(f"📁 Path: {style_info['path']}")
|
| 298 |
+
print(f"📊 Checkpoints: {len(style_info['checkpoints'])}")
|
| 299 |
+
print(f"🏆 Latest: {Path(style_info['latest_checkpoint']).name}")
|
| 300 |
+
|
| 301 |
+
# Training info
|
| 302 |
+
training_info = style_info.get("training_info", {})
|
| 303 |
+
if training_info:
|
| 304 |
+
print(f"\n🚀 Training Information:")
|
| 305 |
+
print(f" Steps: {training_info.get('total_steps', 'unknown')}")
|
| 306 |
+
print(f" Epochs: {training_info.get('epochs', 'unknown')}")
|
| 307 |
+
print(f" Learning Rate: {training_info.get('learning_rate', 'unknown')}")
|
| 308 |
+
print(f" LoRA Rank: {training_info.get('lora_rank', 'unknown')}")
|
| 309 |
+
print(f" LoRA Alpha: {training_info.get('lora_alpha', 'unknown')}")
|
| 310 |
+
|
| 311 |
+
# Dataset info
|
| 312 |
+
dataset_info = style_info.get("dataset_info", {})
|
| 313 |
+
if dataset_info:
|
| 314 |
+
print(f"\n📊 Dataset Information:")
|
| 315 |
+
print(f" Total Images: {dataset_info.get('total_images', 'unknown')}")
|
| 316 |
+
print(f" Train Images: {dataset_info.get('train_images', 'unknown')}")
|
| 317 |
+
print(f" Validation Images: {dataset_info.get('validation_images', 'unknown')}")
|
| 318 |
+
print(f" Trigger Word: {dataset_info.get('trigger_word', 'unknown')}")
|
| 319 |
+
print(f" Image Size: {dataset_info.get('image_size', 'unknown')}")
|
| 320 |
+
|
| 321 |
+
print(f"\n🕒 Last Scanned: {style_info.get('last_scanned', 'unknown')}")
|
| 322 |
+
|
| 323 |
+
def main():
|
| 324 |
+
"""Main function."""
|
| 325 |
+
args = setup_args()
|
| 326 |
+
|
| 327 |
+
# Initialize style manager
|
| 328 |
+
manager = LoRAStyleManager(args.models_dir)
|
| 329 |
+
|
| 330 |
+
print("🎨 CompI Phase 1.E: LoRA Style Manager")
|
| 331 |
+
print("=" * 40)
|
| 332 |
+
|
| 333 |
+
# Execute commands
|
| 334 |
+
if args.refresh:
|
| 335 |
+
manager.refresh_styles()
|
| 336 |
+
|
| 337 |
+
elif args.list or args.list_detailed:
|
| 338 |
+
styles = manager.list_styles(detailed=args.list_detailed)
|
| 339 |
+
|
| 340 |
+
if not styles:
|
| 341 |
+
print("❌ No LoRA styles found")
|
| 342 |
+
print("💡 Train a style first using: python src/generators/compi_phase1e_lora_training.py")
|
| 343 |
+
else:
|
| 344 |
+
print(f"📋 Available LoRA Styles ({len(styles)}):")
|
| 345 |
+
print("-" * 40)
|
| 346 |
+
|
| 347 |
+
if args.list_detailed:
|
| 348 |
+
for style in styles:
|
| 349 |
+
print_style_info(style)
|
| 350 |
+
print()
|
| 351 |
+
else:
|
| 352 |
+
for style in styles:
|
| 353 |
+
print(f"🎨 {style['name']} ({style['checkpoints']} checkpoints)")
|
| 354 |
+
|
| 355 |
+
elif args.info:
|
| 356 |
+
style_info = manager.get_style_info(args.info)
|
| 357 |
+
if style_info:
|
| 358 |
+
print_style_info(style_info)
|
| 359 |
+
else:
|
| 360 |
+
print(f"❌ Style not found: {args.info}")
|
| 361 |
+
print("💡 Use --list to see available styles")
|
| 362 |
+
|
| 363 |
+
elif args.cleanup:
|
| 364 |
+
removed = manager.cleanup_checkpoints(args.cleanup, args.keep)
|
| 365 |
+
if removed > 0:
|
| 366 |
+
manager.refresh_styles()
|
| 367 |
+
|
| 368 |
+
elif args.delete:
|
| 369 |
+
manager.delete_style(args.delete, args.confirm)
|
| 370 |
+
if args.confirm:
|
| 371 |
+
manager.refresh_styles()
|
| 372 |
+
|
| 373 |
+
elif args.export:
|
| 374 |
+
manager.export_style_info(args.export)
|
| 375 |
+
|
| 376 |
+
else:
|
| 377 |
+
print("❓ No command specified. Use --help for usage information.")
|
| 378 |
+
print("💡 Common commands:")
|
| 379 |
+
print(" --list List available styles")
|
| 380 |
+
print(" --info STYLE_NAME Show style details")
|
| 381 |
+
print(" --refresh Refresh styles database")
|
| 382 |
+
|
| 383 |
+
return 0
|
| 384 |
+
|
| 385 |
+
if __name__ == "__main__":
|
| 386 |
+
exit(main())
|
src/generators/compi_phase2a_audio_to_image.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Phase 2.A: Audio-to-Image Generation
|
| 3 |
+
|
| 4 |
+
This module implements multimodal AI art generation that combines:
|
| 5 |
+
- Text prompts with style and mood conditioning
|
| 6 |
+
- Audio analysis and feature extraction
|
| 7 |
+
- Audio-to-text captioning
|
| 8 |
+
- Intelligent prompt fusion for enhanced creativity
|
| 9 |
+
|
| 10 |
+
Features:
|
| 11 |
+
- Support for various audio formats (mp3, wav, flac, etc.)
|
| 12 |
+
- Real-time audio analysis with tempo, energy, and spectral features
|
| 13 |
+
- OpenAI Whisper integration for audio captioning
|
| 14 |
+
- Comprehensive metadata logging and filename conventions
|
| 15 |
+
- Batch processing capabilities
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import os
|
| 19 |
+
import sys
|
| 20 |
+
import torch
|
| 21 |
+
import json
|
| 22 |
+
from datetime import datetime
|
| 23 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 24 |
+
from pathlib import Path
|
| 25 |
+
import logging
|
| 26 |
+
|
| 27 |
+
# Add project root to path
|
| 28 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 29 |
+
|
| 30 |
+
from diffusers import StableDiffusionPipeline
|
| 31 |
+
from PIL import Image
|
| 32 |
+
import numpy as np
|
| 33 |
+
|
| 34 |
+
from src.utils.audio_utils import AudioProcessor, AudioCaptioner, MultimodalPromptFusion, AudioFeatures
|
| 35 |
+
from src.utils.logging_utils import setup_logger
|
| 36 |
+
from src.utils.file_utils import ensure_directory_exists, generate_filename
|
| 37 |
+
|
| 38 |
+
# Setup logging
|
| 39 |
+
logger = setup_logger(__name__)
|
| 40 |
+
|
| 41 |
+
class CompIPhase2AAudioToImage:
|
| 42 |
+
"""
|
| 43 |
+
CompI Phase 2.A: Audio-to-Image Generation System
|
| 44 |
+
|
| 45 |
+
Combines text prompts with audio analysis to generate contextually rich AI art
|
| 46 |
+
"""
|
| 47 |
+
|
| 48 |
+
def __init__(
|
| 49 |
+
self,
|
| 50 |
+
model_name: str = "runwayml/stable-diffusion-v1-5",
|
| 51 |
+
device: str = "auto",
|
| 52 |
+
output_dir: str = "outputs",
|
| 53 |
+
whisper_model: str = "base"
|
| 54 |
+
):
|
| 55 |
+
"""
|
| 56 |
+
Initialize the audio-to-image generation system
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
model_name: Stable Diffusion model to use
|
| 60 |
+
device: Device for inference (auto, cpu, cuda)
|
| 61 |
+
output_dir: Directory for saving generated images
|
| 62 |
+
whisper_model: Whisper model size for audio captioning
|
| 63 |
+
"""
|
| 64 |
+
self.model_name = model_name
|
| 65 |
+
self.device = self._setup_device(device)
|
| 66 |
+
self.output_dir = Path(output_dir)
|
| 67 |
+
ensure_directory_exists(self.output_dir)
|
| 68 |
+
|
| 69 |
+
# Initialize components
|
| 70 |
+
self.pipe = None
|
| 71 |
+
self.audio_processor = AudioProcessor()
|
| 72 |
+
self.audio_captioner = AudioCaptioner(model_size=whisper_model, device=self.device)
|
| 73 |
+
self.prompt_fusion = MultimodalPromptFusion()
|
| 74 |
+
|
| 75 |
+
logger.info(f"Initialized CompI Phase 2.A on {self.device}")
|
| 76 |
+
|
| 77 |
+
def _setup_device(self, device: str) -> str:
|
| 78 |
+
"""Setup and validate device"""
|
| 79 |
+
if device == "auto":
|
| 80 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 81 |
+
|
| 82 |
+
if device == "cuda" and not torch.cuda.is_available():
|
| 83 |
+
logger.warning("CUDA requested but not available, falling back to CPU")
|
| 84 |
+
device = "cpu"
|
| 85 |
+
|
| 86 |
+
return device
|
| 87 |
+
|
| 88 |
+
def _load_pipeline(self):
|
| 89 |
+
"""Lazy load the Stable Diffusion pipeline"""
|
| 90 |
+
if self.pipe is None:
|
| 91 |
+
logger.info(f"Loading Stable Diffusion model: {self.model_name}")
|
| 92 |
+
|
| 93 |
+
# Custom safety checker that allows creative content
|
| 94 |
+
def dummy_safety_checker(images, **kwargs):
|
| 95 |
+
return images, [False] * len(images)
|
| 96 |
+
|
| 97 |
+
self.pipe = StableDiffusionPipeline.from_pretrained(
|
| 98 |
+
self.model_name,
|
| 99 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 100 |
+
safety_checker=dummy_safety_checker,
|
| 101 |
+
requires_safety_checker=False
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
self.pipe = self.pipe.to(self.device)
|
| 105 |
+
self.pipe.enable_attention_slicing()
|
| 106 |
+
|
| 107 |
+
if self.device == "cuda":
|
| 108 |
+
self.pipe.enable_model_cpu_offload()
|
| 109 |
+
|
| 110 |
+
logger.info("Stable Diffusion pipeline loaded successfully")
|
| 111 |
+
|
| 112 |
+
def analyze_audio(self, audio_path: str, include_caption: bool = True) -> Tuple[AudioFeatures, str]:
|
| 113 |
+
"""
|
| 114 |
+
Comprehensive audio analysis
|
| 115 |
+
|
| 116 |
+
Args:
|
| 117 |
+
audio_path: Path to audio file
|
| 118 |
+
include_caption: Whether to generate audio caption
|
| 119 |
+
|
| 120 |
+
Returns:
|
| 121 |
+
Tuple of (AudioFeatures, audio_caption)
|
| 122 |
+
"""
|
| 123 |
+
logger.info(f"Analyzing audio: {audio_path}")
|
| 124 |
+
|
| 125 |
+
# Extract audio features
|
| 126 |
+
audio_features = self.audio_processor.analyze_audio_file(audio_path)
|
| 127 |
+
|
| 128 |
+
# Generate audio caption if requested
|
| 129 |
+
audio_caption = ""
|
| 130 |
+
if include_caption:
|
| 131 |
+
try:
|
| 132 |
+
audio_caption = self.audio_captioner.caption_audio(audio_path)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
logger.warning(f"Audio captioning failed: {e}")
|
| 135 |
+
audio_caption = ""
|
| 136 |
+
|
| 137 |
+
return audio_features, audio_caption
|
| 138 |
+
|
| 139 |
+
def generate_image(
|
| 140 |
+
self,
|
| 141 |
+
text_prompt: str,
|
| 142 |
+
style: str = "",
|
| 143 |
+
mood: str = "",
|
| 144 |
+
audio_path: Optional[str] = None,
|
| 145 |
+
num_images: int = 1,
|
| 146 |
+
height: int = 512,
|
| 147 |
+
width: int = 512,
|
| 148 |
+
num_inference_steps: int = 30,
|
| 149 |
+
guidance_scale: float = 7.5,
|
| 150 |
+
seed: Optional[int] = None
|
| 151 |
+
) -> List[Dict]:
|
| 152 |
+
"""
|
| 153 |
+
Generate images with optional audio conditioning
|
| 154 |
+
|
| 155 |
+
Args:
|
| 156 |
+
text_prompt: Base text prompt
|
| 157 |
+
style: Art style
|
| 158 |
+
mood: Mood/atmosphere
|
| 159 |
+
audio_path: Optional path to audio file for conditioning
|
| 160 |
+
num_images: Number of images to generate
|
| 161 |
+
height: Image height
|
| 162 |
+
width: Image width
|
| 163 |
+
num_inference_steps: Number of diffusion steps
|
| 164 |
+
guidance_scale: Guidance scale for generation
|
| 165 |
+
seed: Random seed for reproducibility
|
| 166 |
+
|
| 167 |
+
Returns:
|
| 168 |
+
List of generation results with metadata
|
| 169 |
+
"""
|
| 170 |
+
self._load_pipeline()
|
| 171 |
+
|
| 172 |
+
# Analyze audio if provided
|
| 173 |
+
audio_features = None
|
| 174 |
+
audio_caption = ""
|
| 175 |
+
if audio_path and os.path.exists(audio_path):
|
| 176 |
+
audio_features, audio_caption = self.analyze_audio(audio_path)
|
| 177 |
+
|
| 178 |
+
# Create enhanced prompt
|
| 179 |
+
if audio_features:
|
| 180 |
+
enhanced_prompt = self.prompt_fusion.fuse_prompt_with_audio(
|
| 181 |
+
text_prompt, style, mood, audio_features, audio_caption
|
| 182 |
+
)
|
| 183 |
+
else:
|
| 184 |
+
enhanced_prompt = text_prompt
|
| 185 |
+
if style:
|
| 186 |
+
enhanced_prompt += f", {style}"
|
| 187 |
+
if mood:
|
| 188 |
+
enhanced_prompt += f", {mood}"
|
| 189 |
+
|
| 190 |
+
logger.info(f"Generating {num_images} image(s) with prompt: {enhanced_prompt}")
|
| 191 |
+
|
| 192 |
+
results = []
|
| 193 |
+
|
| 194 |
+
for i in range(num_images):
|
| 195 |
+
# Set up generation parameters
|
| 196 |
+
current_seed = seed if seed is not None else torch.seed()
|
| 197 |
+
generator = torch.Generator(device=self.device).manual_seed(current_seed)
|
| 198 |
+
|
| 199 |
+
# Generate image
|
| 200 |
+
with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
|
| 201 |
+
result = self.pipe(
|
| 202 |
+
enhanced_prompt,
|
| 203 |
+
height=height,
|
| 204 |
+
width=width,
|
| 205 |
+
num_inference_steps=num_inference_steps,
|
| 206 |
+
guidance_scale=guidance_scale,
|
| 207 |
+
generator=generator
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
image = result.images[0]
|
| 211 |
+
|
| 212 |
+
# Create metadata
|
| 213 |
+
metadata = {
|
| 214 |
+
"timestamp": datetime.now().isoformat(),
|
| 215 |
+
"text_prompt": text_prompt,
|
| 216 |
+
"style": style,
|
| 217 |
+
"mood": mood,
|
| 218 |
+
"enhanced_prompt": enhanced_prompt,
|
| 219 |
+
"audio_path": audio_path,
|
| 220 |
+
"audio_caption": audio_caption,
|
| 221 |
+
"generation_params": {
|
| 222 |
+
"height": height,
|
| 223 |
+
"width": width,
|
| 224 |
+
"num_inference_steps": num_inference_steps,
|
| 225 |
+
"guidance_scale": guidance_scale,
|
| 226 |
+
"seed": current_seed,
|
| 227 |
+
"model": self.model_name
|
| 228 |
+
},
|
| 229 |
+
"device": self.device,
|
| 230 |
+
"phase": "2A_audio_to_image"
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
# Add audio features to metadata
|
| 234 |
+
if audio_features:
|
| 235 |
+
metadata["audio_features"] = audio_features.to_dict()
|
| 236 |
+
metadata["audio_tags"] = self.prompt_fusion.generate_audio_tags(audio_features)
|
| 237 |
+
|
| 238 |
+
# Generate filename
|
| 239 |
+
filename = self._generate_filename(
|
| 240 |
+
text_prompt, style, mood, current_seed, i + 1,
|
| 241 |
+
has_audio=audio_path is not None
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
# Save image and metadata
|
| 245 |
+
image_path = self.output_dir / f"{filename}.png"
|
| 246 |
+
metadata_path = self.output_dir / f"{filename}_metadata.json"
|
| 247 |
+
|
| 248 |
+
image.save(image_path)
|
| 249 |
+
with open(metadata_path, 'w') as f:
|
| 250 |
+
json.dump(metadata, f, indent=2)
|
| 251 |
+
|
| 252 |
+
results.append({
|
| 253 |
+
"image": image,
|
| 254 |
+
"image_path": str(image_path),
|
| 255 |
+
"metadata_path": str(metadata_path),
|
| 256 |
+
"metadata": metadata,
|
| 257 |
+
"filename": filename
|
| 258 |
+
})
|
| 259 |
+
|
| 260 |
+
logger.info(f"Generated image {i+1}/{num_images}: {filename}")
|
| 261 |
+
|
| 262 |
+
return results
|
| 263 |
+
|
| 264 |
+
def _generate_filename(
|
| 265 |
+
self,
|
| 266 |
+
prompt: str,
|
| 267 |
+
style: str,
|
| 268 |
+
mood: str,
|
| 269 |
+
seed: int,
|
| 270 |
+
variation: int,
|
| 271 |
+
has_audio: bool = False
|
| 272 |
+
) -> str:
|
| 273 |
+
"""Generate descriptive filename following CompI conventions"""
|
| 274 |
+
|
| 275 |
+
# Create prompt slug (first 5 words)
|
| 276 |
+
prompt_words = prompt.lower().replace(',', '').split()[:5]
|
| 277 |
+
prompt_slug = "_".join(prompt_words)
|
| 278 |
+
|
| 279 |
+
# Create style and mood slugs
|
| 280 |
+
style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
|
| 281 |
+
mood_slug = mood.replace(" ", "").replace(",", "")[:10] if mood else "neutral"
|
| 282 |
+
|
| 283 |
+
# Timestamp
|
| 284 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 285 |
+
|
| 286 |
+
# Audio indicator
|
| 287 |
+
audio_tag = "_AUDIO" if has_audio else ""
|
| 288 |
+
|
| 289 |
+
# Combine all elements
|
| 290 |
+
filename = f"{prompt_slug}_{style_slug}_{mood_slug}_{timestamp}_seed{seed}{audio_tag}_v{variation}"
|
| 291 |
+
|
| 292 |
+
return filename
|
| 293 |
+
|
| 294 |
+
def batch_process(
|
| 295 |
+
self,
|
| 296 |
+
audio_directory: str,
|
| 297 |
+
text_prompt: str,
|
| 298 |
+
style: str = "",
|
| 299 |
+
mood: str = "",
|
| 300 |
+
**generation_kwargs
|
| 301 |
+
) -> List[Dict]:
|
| 302 |
+
"""
|
| 303 |
+
Process multiple audio files in batch
|
| 304 |
+
|
| 305 |
+
Args:
|
| 306 |
+
audio_directory: Directory containing audio files
|
| 307 |
+
text_prompt: Base text prompt for all generations
|
| 308 |
+
style: Art style
|
| 309 |
+
mood: Mood/atmosphere
|
| 310 |
+
**generation_kwargs: Additional generation parameters
|
| 311 |
+
|
| 312 |
+
Returns:
|
| 313 |
+
List of all generation results
|
| 314 |
+
"""
|
| 315 |
+
audio_dir = Path(audio_directory)
|
| 316 |
+
if not audio_dir.exists():
|
| 317 |
+
raise ValueError(f"Audio directory not found: {audio_directory}")
|
| 318 |
+
|
| 319 |
+
# Find audio files
|
| 320 |
+
audio_extensions = {'.mp3', '.wav', '.flac', '.m4a', '.ogg'}
|
| 321 |
+
audio_files = [
|
| 322 |
+
f for f in audio_dir.iterdir()
|
| 323 |
+
if f.suffix.lower() in audio_extensions
|
| 324 |
+
]
|
| 325 |
+
|
| 326 |
+
if not audio_files:
|
| 327 |
+
raise ValueError(f"No audio files found in {audio_directory}")
|
| 328 |
+
|
| 329 |
+
logger.info(f"Processing {len(audio_files)} audio files")
|
| 330 |
+
|
| 331 |
+
all_results = []
|
| 332 |
+
for audio_file in audio_files:
|
| 333 |
+
logger.info(f"Processing: {audio_file.name}")
|
| 334 |
+
|
| 335 |
+
try:
|
| 336 |
+
results = self.generate_image(
|
| 337 |
+
text_prompt=text_prompt,
|
| 338 |
+
style=style,
|
| 339 |
+
mood=mood,
|
| 340 |
+
audio_path=str(audio_file),
|
| 341 |
+
**generation_kwargs
|
| 342 |
+
)
|
| 343 |
+
all_results.extend(results)
|
| 344 |
+
|
| 345 |
+
except Exception as e:
|
| 346 |
+
logger.error(f"Error processing {audio_file.name}: {e}")
|
| 347 |
+
continue
|
| 348 |
+
|
| 349 |
+
logger.info(f"Batch processing complete: {len(all_results)} images generated")
|
| 350 |
+
return all_results
|
src/generators/compi_phase2b_data_to_image.py
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Phase 2.B: Data/Logic Input to Image Generation
|
| 3 |
+
|
| 4 |
+
This module implements data-driven AI art generation that combines:
|
| 5 |
+
- CSV data analysis and processing
|
| 6 |
+
- Mathematical formula evaluation
|
| 7 |
+
- Data-to-text conversion for prompt enhancement
|
| 8 |
+
- Data visualization for artistic conditioning
|
| 9 |
+
- Intelligent fusion of data insights with creative prompts
|
| 10 |
+
|
| 11 |
+
Features:
|
| 12 |
+
- Support for CSV files with comprehensive data analysis
|
| 13 |
+
- Safe mathematical formula evaluation with NumPy
|
| 14 |
+
- Poetic text generation from data patterns
|
| 15 |
+
- Data visualization creation for artistic inspiration
|
| 16 |
+
- Comprehensive metadata logging and filename conventions
|
| 17 |
+
- Batch processing capabilities for multiple datasets
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import os
|
| 21 |
+
import sys
|
| 22 |
+
import torch
|
| 23 |
+
import json
|
| 24 |
+
import pandas as pd
|
| 25 |
+
import numpy as np
|
| 26 |
+
from datetime import datetime
|
| 27 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
import logging
|
| 30 |
+
|
| 31 |
+
# Add project root to path
|
| 32 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 33 |
+
|
| 34 |
+
from diffusers import StableDiffusionPipeline
|
| 35 |
+
from PIL import Image
|
| 36 |
+
|
| 37 |
+
from src.utils.data_utils import DataProcessor, DataToTextConverter, DataVisualizer, DataFeatures
|
| 38 |
+
from src.utils.logging_utils import setup_logger
|
| 39 |
+
from src.utils.file_utils import ensure_directory_exists, generate_filename
|
| 40 |
+
|
| 41 |
+
# Setup logging
|
| 42 |
+
logger = setup_logger(__name__)
|
| 43 |
+
|
| 44 |
+
class CompIPhase2BDataToImage:
|
| 45 |
+
"""
|
| 46 |
+
CompI Phase 2.B: Data/Logic Input to Image Generation System
|
| 47 |
+
|
| 48 |
+
Transforms structured data and mathematical formulas into AI-generated art
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
def __init__(
|
| 52 |
+
self,
|
| 53 |
+
model_name: str = "runwayml/stable-diffusion-v1-5",
|
| 54 |
+
device: str = "auto",
|
| 55 |
+
output_dir: str = "outputs",
|
| 56 |
+
visualization_style: str = "artistic"
|
| 57 |
+
):
|
| 58 |
+
"""
|
| 59 |
+
Initialize the data-to-image generation system
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
model_name: Stable Diffusion model to use
|
| 63 |
+
device: Device for inference (auto, cpu, cuda)
|
| 64 |
+
output_dir: Directory for saving generated images
|
| 65 |
+
visualization_style: Style for data visualizations
|
| 66 |
+
"""
|
| 67 |
+
self.model_name = model_name
|
| 68 |
+
self.device = self._setup_device(device)
|
| 69 |
+
self.output_dir = Path(output_dir)
|
| 70 |
+
ensure_directory_exists(self.output_dir)
|
| 71 |
+
|
| 72 |
+
# Initialize components
|
| 73 |
+
self.pipe = None
|
| 74 |
+
self.data_processor = DataProcessor()
|
| 75 |
+
self.text_converter = DataToTextConverter()
|
| 76 |
+
self.visualizer = DataVisualizer(style=visualization_style)
|
| 77 |
+
|
| 78 |
+
logger.info(f"Initialized CompI Phase 2.B on {self.device}")
|
| 79 |
+
|
| 80 |
+
def _setup_device(self, device: str) -> str:
|
| 81 |
+
"""Setup and validate device"""
|
| 82 |
+
if device == "auto":
|
| 83 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 84 |
+
|
| 85 |
+
if device == "cuda" and not torch.cuda.is_available():
|
| 86 |
+
logger.warning("CUDA requested but not available, falling back to CPU")
|
| 87 |
+
device = "cpu"
|
| 88 |
+
|
| 89 |
+
return device
|
| 90 |
+
|
| 91 |
+
def _load_pipeline(self):
|
| 92 |
+
"""Lazy load the Stable Diffusion pipeline"""
|
| 93 |
+
if self.pipe is None:
|
| 94 |
+
logger.info(f"Loading Stable Diffusion model: {self.model_name}")
|
| 95 |
+
|
| 96 |
+
# Custom safety checker that allows creative content
|
| 97 |
+
def dummy_safety_checker(images, **kwargs):
|
| 98 |
+
return images, [False] * len(images)
|
| 99 |
+
|
| 100 |
+
self.pipe = StableDiffusionPipeline.from_pretrained(
|
| 101 |
+
self.model_name,
|
| 102 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 103 |
+
safety_checker=dummy_safety_checker,
|
| 104 |
+
requires_safety_checker=False
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
self.pipe = self.pipe.to(self.device)
|
| 108 |
+
self.pipe.enable_attention_slicing()
|
| 109 |
+
|
| 110 |
+
if self.device == "cuda":
|
| 111 |
+
self.pipe.enable_model_cpu_offload()
|
| 112 |
+
|
| 113 |
+
logger.info("Stable Diffusion pipeline loaded successfully")
|
| 114 |
+
|
| 115 |
+
def analyze_csv_data(self, csv_path: str) -> Tuple[pd.DataFrame, DataFeatures, str, Image.Image]:
|
| 116 |
+
"""
|
| 117 |
+
Comprehensive CSV data analysis
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
csv_path: Path to CSV file
|
| 121 |
+
|
| 122 |
+
Returns:
|
| 123 |
+
Tuple of (DataFrame, DataFeatures, poetic_description, visualization_image)
|
| 124 |
+
"""
|
| 125 |
+
logger.info(f"Analyzing CSV data: {csv_path}")
|
| 126 |
+
|
| 127 |
+
# Load and analyze data
|
| 128 |
+
df = pd.read_csv(csv_path)
|
| 129 |
+
features = self.data_processor.analyze_csv_data(df)
|
| 130 |
+
|
| 131 |
+
# Generate poetic description
|
| 132 |
+
poetic_description = self.text_converter.generate_poetic_description(features)
|
| 133 |
+
|
| 134 |
+
# Create visualization
|
| 135 |
+
visualization_image = self.visualizer.create_data_visualization(df, features)
|
| 136 |
+
|
| 137 |
+
return df, features, poetic_description, visualization_image
|
| 138 |
+
|
| 139 |
+
def evaluate_mathematical_formula(self, formula: str, num_points: int = 100) -> Tuple[np.ndarray, Dict, str, Image.Image]:
|
| 140 |
+
"""
|
| 141 |
+
Evaluate mathematical formula and create artistic interpretation
|
| 142 |
+
|
| 143 |
+
Args:
|
| 144 |
+
formula: Mathematical expression
|
| 145 |
+
num_points: Number of points to generate
|
| 146 |
+
|
| 147 |
+
Returns:
|
| 148 |
+
Tuple of (result_array, metadata, poetic_description, visualization_image)
|
| 149 |
+
"""
|
| 150 |
+
logger.info(f"Evaluating mathematical formula: {formula}")
|
| 151 |
+
|
| 152 |
+
# Evaluate formula
|
| 153 |
+
result_array, metadata = self.data_processor.evaluate_formula(formula, num_points)
|
| 154 |
+
|
| 155 |
+
# Generate poetic description
|
| 156 |
+
poetic_description = self.text_converter.generate_formula_description(formula, metadata)
|
| 157 |
+
|
| 158 |
+
# Create visualization
|
| 159 |
+
visualization_image = self.visualizer.create_formula_visualization(result_array, formula, metadata)
|
| 160 |
+
|
| 161 |
+
return result_array, metadata, poetic_description, visualization_image
|
| 162 |
+
|
| 163 |
+
def generate_image(
|
| 164 |
+
self,
|
| 165 |
+
text_prompt: str,
|
| 166 |
+
style: str = "",
|
| 167 |
+
mood: str = "",
|
| 168 |
+
csv_path: Optional[str] = None,
|
| 169 |
+
formula: Optional[str] = None,
|
| 170 |
+
num_images: int = 1,
|
| 171 |
+
height: int = 512,
|
| 172 |
+
width: int = 512,
|
| 173 |
+
num_inference_steps: int = 30,
|
| 174 |
+
guidance_scale: float = 7.5,
|
| 175 |
+
seed: Optional[int] = None,
|
| 176 |
+
save_data_visualization: bool = True
|
| 177 |
+
) -> List[Dict]:
|
| 178 |
+
"""
|
| 179 |
+
Generate images with data/formula conditioning
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
text_prompt: Base text prompt
|
| 183 |
+
style: Art style
|
| 184 |
+
mood: Mood/atmosphere
|
| 185 |
+
csv_path: Optional path to CSV file
|
| 186 |
+
formula: Optional mathematical formula
|
| 187 |
+
num_images: Number of images to generate
|
| 188 |
+
height: Image height
|
| 189 |
+
width: Image width
|
| 190 |
+
num_inference_steps: Number of diffusion steps
|
| 191 |
+
guidance_scale: Guidance scale for generation
|
| 192 |
+
seed: Random seed for reproducibility
|
| 193 |
+
save_data_visualization: Whether to save data visualization
|
| 194 |
+
|
| 195 |
+
Returns:
|
| 196 |
+
List of generation results with metadata
|
| 197 |
+
"""
|
| 198 |
+
self._load_pipeline()
|
| 199 |
+
|
| 200 |
+
# Process data input
|
| 201 |
+
data_features = None
|
| 202 |
+
poetic_description = ""
|
| 203 |
+
data_visualization = None
|
| 204 |
+
data_type = "none"
|
| 205 |
+
|
| 206 |
+
if csv_path and os.path.exists(csv_path):
|
| 207 |
+
df, data_features, poetic_description, data_visualization = self.analyze_csv_data(csv_path)
|
| 208 |
+
data_type = "csv"
|
| 209 |
+
elif formula and formula.strip():
|
| 210 |
+
result_array, formula_metadata, poetic_description, data_visualization = self.evaluate_mathematical_formula(formula)
|
| 211 |
+
data_type = "formula"
|
| 212 |
+
|
| 213 |
+
# Create enhanced prompt
|
| 214 |
+
enhanced_prompt = text_prompt
|
| 215 |
+
if style:
|
| 216 |
+
enhanced_prompt += f", {style}"
|
| 217 |
+
if mood:
|
| 218 |
+
enhanced_prompt += f", {mood}"
|
| 219 |
+
if poetic_description:
|
| 220 |
+
enhanced_prompt += f", {poetic_description}"
|
| 221 |
+
|
| 222 |
+
logger.info(f"Generating {num_images} image(s) with enhanced prompt")
|
| 223 |
+
|
| 224 |
+
results = []
|
| 225 |
+
|
| 226 |
+
for i in range(num_images):
|
| 227 |
+
# Set up generation parameters
|
| 228 |
+
current_seed = seed if seed is not None else torch.seed()
|
| 229 |
+
generator = torch.Generator(device=self.device).manual_seed(current_seed)
|
| 230 |
+
|
| 231 |
+
# Generate image
|
| 232 |
+
with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
|
| 233 |
+
result = self.pipe(
|
| 234 |
+
enhanced_prompt,
|
| 235 |
+
height=height,
|
| 236 |
+
width=width,
|
| 237 |
+
num_inference_steps=num_inference_steps,
|
| 238 |
+
guidance_scale=guidance_scale,
|
| 239 |
+
generator=generator
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
image = result.images[0]
|
| 243 |
+
|
| 244 |
+
# Create metadata
|
| 245 |
+
metadata = {
|
| 246 |
+
"timestamp": datetime.now().isoformat(),
|
| 247 |
+
"text_prompt": text_prompt,
|
| 248 |
+
"style": style,
|
| 249 |
+
"mood": mood,
|
| 250 |
+
"enhanced_prompt": enhanced_prompt,
|
| 251 |
+
"poetic_description": poetic_description,
|
| 252 |
+
"data_type": data_type,
|
| 253 |
+
"csv_path": csv_path,
|
| 254 |
+
"formula": formula,
|
| 255 |
+
"generation_params": {
|
| 256 |
+
"height": height,
|
| 257 |
+
"width": width,
|
| 258 |
+
"num_inference_steps": num_inference_steps,
|
| 259 |
+
"guidance_scale": guidance_scale,
|
| 260 |
+
"seed": current_seed,
|
| 261 |
+
"model": self.model_name
|
| 262 |
+
},
|
| 263 |
+
"device": self.device,
|
| 264 |
+
"phase": "2B_data_to_image"
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
# Add data features to metadata
|
| 268 |
+
if data_features:
|
| 269 |
+
metadata["data_features"] = data_features.to_dict()
|
| 270 |
+
|
| 271 |
+
# Generate filename
|
| 272 |
+
filename = self._generate_filename(
|
| 273 |
+
text_prompt, style, mood, current_seed, i + 1,
|
| 274 |
+
data_type=data_type
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
# Save image and metadata
|
| 278 |
+
image_path = self.output_dir / f"{filename}.png"
|
| 279 |
+
metadata_path = self.output_dir / f"{filename}_metadata.json"
|
| 280 |
+
|
| 281 |
+
image.save(image_path)
|
| 282 |
+
with open(metadata_path, 'w') as f:
|
| 283 |
+
json.dump(metadata, f, indent=2)
|
| 284 |
+
|
| 285 |
+
# Save data visualization if requested
|
| 286 |
+
data_viz_path = None
|
| 287 |
+
if save_data_visualization and data_visualization:
|
| 288 |
+
data_viz_path = self.output_dir / f"{filename}_data_viz.png"
|
| 289 |
+
data_visualization.save(data_viz_path)
|
| 290 |
+
|
| 291 |
+
results.append({
|
| 292 |
+
"image": image,
|
| 293 |
+
"image_path": str(image_path),
|
| 294 |
+
"metadata_path": str(metadata_path),
|
| 295 |
+
"data_visualization_path": str(data_viz_path) if data_viz_path else None,
|
| 296 |
+
"data_visualization": data_visualization,
|
| 297 |
+
"metadata": metadata,
|
| 298 |
+
"filename": filename,
|
| 299 |
+
"poetic_description": poetic_description
|
| 300 |
+
})
|
| 301 |
+
|
| 302 |
+
logger.info(f"Generated image {i+1}/{num_images}: {filename}")
|
| 303 |
+
|
| 304 |
+
return results
|
| 305 |
+
|
| 306 |
+
def _generate_filename(
|
| 307 |
+
self,
|
| 308 |
+
prompt: str,
|
| 309 |
+
style: str,
|
| 310 |
+
mood: str,
|
| 311 |
+
seed: int,
|
| 312 |
+
variation: int,
|
| 313 |
+
data_type: str = "none"
|
| 314 |
+
) -> str:
|
| 315 |
+
"""Generate descriptive filename following CompI conventions"""
|
| 316 |
+
|
| 317 |
+
# Create prompt slug (first 5 words)
|
| 318 |
+
prompt_words = prompt.lower().replace(',', '').split()[:5]
|
| 319 |
+
prompt_slug = "_".join(prompt_words)
|
| 320 |
+
|
| 321 |
+
# Create style and mood slugs
|
| 322 |
+
style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
|
| 323 |
+
mood_slug = mood.replace(" ", "").replace(",", "")[:10] if mood else "neutral"
|
| 324 |
+
|
| 325 |
+
# Timestamp
|
| 326 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 327 |
+
|
| 328 |
+
# Data type indicator
|
| 329 |
+
data_tag = f"_{data_type.upper()}" if data_type != "none" else ""
|
| 330 |
+
|
| 331 |
+
# Combine all elements
|
| 332 |
+
filename = f"{prompt_slug}_{style_slug}_{mood_slug}_{timestamp}_seed{seed}{data_tag}_v{variation}"
|
| 333 |
+
|
| 334 |
+
return filename
|
| 335 |
+
|
| 336 |
+
def batch_process_csv_files(
|
| 337 |
+
self,
|
| 338 |
+
csv_directory: str,
|
| 339 |
+
text_prompt: str,
|
| 340 |
+
style: str = "",
|
| 341 |
+
mood: str = "",
|
| 342 |
+
**generation_kwargs
|
| 343 |
+
) -> List[Dict]:
|
| 344 |
+
"""
|
| 345 |
+
Process multiple CSV files in batch
|
| 346 |
+
|
| 347 |
+
Args:
|
| 348 |
+
csv_directory: Directory containing CSV files
|
| 349 |
+
text_prompt: Base text prompt for all generations
|
| 350 |
+
style: Art style
|
| 351 |
+
mood: Mood/atmosphere
|
| 352 |
+
**generation_kwargs: Additional generation parameters
|
| 353 |
+
|
| 354 |
+
Returns:
|
| 355 |
+
List of all generation results
|
| 356 |
+
"""
|
| 357 |
+
csv_dir = Path(csv_directory)
|
| 358 |
+
if not csv_dir.exists():
|
| 359 |
+
raise ValueError(f"CSV directory not found: {csv_directory}")
|
| 360 |
+
|
| 361 |
+
# Find CSV files
|
| 362 |
+
csv_files = list(csv_dir.glob("*.csv"))
|
| 363 |
+
|
| 364 |
+
if not csv_files:
|
| 365 |
+
raise ValueError(f"No CSV files found in {csv_directory}")
|
| 366 |
+
|
| 367 |
+
logger.info(f"Processing {len(csv_files)} CSV files")
|
| 368 |
+
|
| 369 |
+
all_results = []
|
| 370 |
+
for csv_file in csv_files:
|
| 371 |
+
logger.info(f"Processing: {csv_file.name}")
|
| 372 |
+
|
| 373 |
+
try:
|
| 374 |
+
results = self.generate_image(
|
| 375 |
+
text_prompt=text_prompt,
|
| 376 |
+
style=style,
|
| 377 |
+
mood=mood,
|
| 378 |
+
csv_path=str(csv_file),
|
| 379 |
+
**generation_kwargs
|
| 380 |
+
)
|
| 381 |
+
all_results.extend(results)
|
| 382 |
+
|
| 383 |
+
except Exception as e:
|
| 384 |
+
logger.error(f"Error processing {csv_file.name}: {e}")
|
| 385 |
+
continue
|
| 386 |
+
|
| 387 |
+
logger.info(f"Batch processing complete: {len(all_results)} images generated")
|
| 388 |
+
return all_results
|
| 389 |
+
|
| 390 |
+
def batch_process_formulas(
|
| 391 |
+
self,
|
| 392 |
+
formulas: List[str],
|
| 393 |
+
text_prompt: str,
|
| 394 |
+
style: str = "",
|
| 395 |
+
mood: str = "",
|
| 396 |
+
**generation_kwargs
|
| 397 |
+
) -> List[Dict]:
|
| 398 |
+
"""
|
| 399 |
+
Process multiple mathematical formulas in batch
|
| 400 |
+
|
| 401 |
+
Args:
|
| 402 |
+
formulas: List of mathematical formulas
|
| 403 |
+
text_prompt: Base text prompt for all generations
|
| 404 |
+
style: Art style
|
| 405 |
+
mood: Mood/atmosphere
|
| 406 |
+
**generation_kwargs: Additional generation parameters
|
| 407 |
+
|
| 408 |
+
Returns:
|
| 409 |
+
List of all generation results
|
| 410 |
+
"""
|
| 411 |
+
logger.info(f"Processing {len(formulas)} mathematical formulas")
|
| 412 |
+
|
| 413 |
+
all_results = []
|
| 414 |
+
for i, formula in enumerate(formulas):
|
| 415 |
+
logger.info(f"Processing formula {i+1}/{len(formulas)}: {formula}")
|
| 416 |
+
|
| 417 |
+
try:
|
| 418 |
+
results = self.generate_image(
|
| 419 |
+
text_prompt=text_prompt,
|
| 420 |
+
style=style,
|
| 421 |
+
mood=mood,
|
| 422 |
+
formula=formula,
|
| 423 |
+
**generation_kwargs
|
| 424 |
+
)
|
| 425 |
+
all_results.extend(results)
|
| 426 |
+
|
| 427 |
+
except Exception as e:
|
| 428 |
+
logger.error(f"Error processing formula '{formula}': {e}")
|
| 429 |
+
continue
|
| 430 |
+
|
| 431 |
+
logger.info(f"Batch processing complete: {len(all_results)} images generated")
|
| 432 |
+
return all_results
|
src/generators/compi_phase2c_emotion_to_image.py
ADDED
|
@@ -0,0 +1,408 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Phase 2.C: Emotional/Contextual Input to Image Generation
|
| 3 |
+
|
| 4 |
+
This module implements emotion-driven AI art generation that combines:
|
| 5 |
+
- Emotion detection and sentiment analysis
|
| 6 |
+
- Contextual mood processing
|
| 7 |
+
- Emoji and text-based emotion recognition
|
| 8 |
+
- Color palette generation based on emotions
|
| 9 |
+
- Intelligent fusion of emotional context with creative prompts
|
| 10 |
+
|
| 11 |
+
Features:
|
| 12 |
+
- Support for preset emotions, custom emotions, and emoji input
|
| 13 |
+
- Automatic sentiment analysis with TextBlob
|
| 14 |
+
- Emotion-to-color palette mapping
|
| 15 |
+
- Contextual prompt enhancement
|
| 16 |
+
- Comprehensive metadata logging and filename conventions
|
| 17 |
+
- Batch processing capabilities for multiple emotional contexts
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import os
|
| 21 |
+
import sys
|
| 22 |
+
import torch
|
| 23 |
+
import json
|
| 24 |
+
from datetime import datetime
|
| 25 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
import logging
|
| 28 |
+
|
| 29 |
+
# Add project root to path
|
| 30 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 31 |
+
|
| 32 |
+
from diffusers import StableDiffusionPipeline
|
| 33 |
+
from PIL import Image
|
| 34 |
+
|
| 35 |
+
from src.utils.emotion_utils import EmotionProcessor, EmotionalPromptEnhancer, EmotionAnalysis, EmotionCategory
|
| 36 |
+
from src.utils.logging_utils import setup_logger
|
| 37 |
+
from src.utils.file_utils import ensure_directory_exists, generate_filename
|
| 38 |
+
|
| 39 |
+
# Setup logging
|
| 40 |
+
logger = setup_logger(__name__)
|
| 41 |
+
|
| 42 |
+
class CompIPhase2CEmotionToImage:
|
| 43 |
+
"""
|
| 44 |
+
CompI Phase 2.C: Emotional/Contextual Input to Image Generation System
|
| 45 |
+
|
| 46 |
+
Transforms emotions, moods, and contextual feelings into AI-generated art
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
def __init__(
|
| 50 |
+
self,
|
| 51 |
+
model_name: str = "runwayml/stable-diffusion-v1-5",
|
| 52 |
+
device: str = "auto",
|
| 53 |
+
output_dir: str = "outputs"
|
| 54 |
+
):
|
| 55 |
+
"""
|
| 56 |
+
Initialize the emotion-to-image generation system
|
| 57 |
+
|
| 58 |
+
Args:
|
| 59 |
+
model_name: Stable Diffusion model to use
|
| 60 |
+
device: Device for inference (auto, cpu, cuda)
|
| 61 |
+
output_dir: Directory for saving generated images
|
| 62 |
+
"""
|
| 63 |
+
self.model_name = model_name
|
| 64 |
+
self.device = self._setup_device(device)
|
| 65 |
+
self.output_dir = Path(output_dir)
|
| 66 |
+
ensure_directory_exists(self.output_dir)
|
| 67 |
+
|
| 68 |
+
# Initialize components
|
| 69 |
+
self.pipe = None
|
| 70 |
+
self.emotion_processor = EmotionProcessor()
|
| 71 |
+
self.prompt_enhancer = EmotionalPromptEnhancer()
|
| 72 |
+
|
| 73 |
+
logger.info(f"Initialized CompI Phase 2.C on {self.device}")
|
| 74 |
+
|
| 75 |
+
def _setup_device(self, device: str) -> str:
|
| 76 |
+
"""Setup and validate device"""
|
| 77 |
+
if device == "auto":
|
| 78 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 79 |
+
|
| 80 |
+
if device == "cuda" and not torch.cuda.is_available():
|
| 81 |
+
logger.warning("CUDA requested but not available, falling back to CPU")
|
| 82 |
+
device = "cpu"
|
| 83 |
+
|
| 84 |
+
return device
|
| 85 |
+
|
| 86 |
+
def _load_pipeline(self):
|
| 87 |
+
"""Lazy load the Stable Diffusion pipeline"""
|
| 88 |
+
if self.pipe is None:
|
| 89 |
+
logger.info(f"Loading Stable Diffusion model: {self.model_name}")
|
| 90 |
+
|
| 91 |
+
# Custom safety checker that allows creative content
|
| 92 |
+
def dummy_safety_checker(images, **kwargs):
|
| 93 |
+
return images, [False] * len(images)
|
| 94 |
+
|
| 95 |
+
self.pipe = StableDiffusionPipeline.from_pretrained(
|
| 96 |
+
self.model_name,
|
| 97 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 98 |
+
safety_checker=dummy_safety_checker,
|
| 99 |
+
requires_safety_checker=False
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
self.pipe = self.pipe.to(self.device)
|
| 103 |
+
self.pipe.enable_attention_slicing()
|
| 104 |
+
|
| 105 |
+
if self.device == "cuda":
|
| 106 |
+
self.pipe.enable_model_cpu_offload()
|
| 107 |
+
|
| 108 |
+
logger.info("Stable Diffusion pipeline loaded successfully")
|
| 109 |
+
|
| 110 |
+
def analyze_emotion(
|
| 111 |
+
self,
|
| 112 |
+
emotion_input: str,
|
| 113 |
+
emotion_type: str = "auto",
|
| 114 |
+
contextual_text: Optional[str] = None
|
| 115 |
+
) -> EmotionAnalysis:
|
| 116 |
+
"""
|
| 117 |
+
Comprehensive emotion analysis
|
| 118 |
+
|
| 119 |
+
Args:
|
| 120 |
+
emotion_input: Emotion input (preset, custom, emoji, or text)
|
| 121 |
+
emotion_type: Type of input ('preset', 'custom', 'emoji', 'text', 'auto')
|
| 122 |
+
contextual_text: Additional contextual text for analysis
|
| 123 |
+
|
| 124 |
+
Returns:
|
| 125 |
+
EmotionAnalysis object with complete analysis
|
| 126 |
+
"""
|
| 127 |
+
logger.info(f"Analyzing emotion input: {emotion_input}")
|
| 128 |
+
|
| 129 |
+
# Combine inputs for analysis
|
| 130 |
+
analysis_text = emotion_input
|
| 131 |
+
if contextual_text:
|
| 132 |
+
analysis_text += f" {contextual_text}"
|
| 133 |
+
|
| 134 |
+
# Determine selected emotion for preset types
|
| 135 |
+
selected_emotion = None
|
| 136 |
+
if emotion_type == "preset" or (emotion_type == "auto" and emotion_input.lower() in self.emotion_processor.preset_emotions):
|
| 137 |
+
selected_emotion = emotion_input.lower()
|
| 138 |
+
|
| 139 |
+
# Perform emotion analysis
|
| 140 |
+
emotion_analysis = self.emotion_processor.analyze_emotion(analysis_text, selected_emotion)
|
| 141 |
+
|
| 142 |
+
return emotion_analysis
|
| 143 |
+
|
| 144 |
+
def generate_image(
|
| 145 |
+
self,
|
| 146 |
+
text_prompt: str,
|
| 147 |
+
style: str = "",
|
| 148 |
+
emotion_input: str = "",
|
| 149 |
+
emotion_type: str = "auto",
|
| 150 |
+
contextual_text: str = "",
|
| 151 |
+
enhancement_strength: float = 0.7,
|
| 152 |
+
num_images: int = 1,
|
| 153 |
+
height: int = 512,
|
| 154 |
+
width: int = 512,
|
| 155 |
+
num_inference_steps: int = 30,
|
| 156 |
+
guidance_scale: float = 7.5,
|
| 157 |
+
seed: Optional[int] = None
|
| 158 |
+
) -> List[Dict]:
|
| 159 |
+
"""
|
| 160 |
+
Generate images with emotional conditioning
|
| 161 |
+
|
| 162 |
+
Args:
|
| 163 |
+
text_prompt: Base text prompt
|
| 164 |
+
style: Art style
|
| 165 |
+
emotion_input: Emotion input (preset, custom, emoji, or descriptive text)
|
| 166 |
+
emotion_type: Type of emotion input
|
| 167 |
+
contextual_text: Additional contextual description
|
| 168 |
+
enhancement_strength: How strongly to apply emotion (0-1)
|
| 169 |
+
num_images: Number of images to generate
|
| 170 |
+
height: Image height
|
| 171 |
+
width: Image width
|
| 172 |
+
num_inference_steps: Number of diffusion steps
|
| 173 |
+
guidance_scale: Guidance scale for generation
|
| 174 |
+
seed: Random seed for reproducibility
|
| 175 |
+
|
| 176 |
+
Returns:
|
| 177 |
+
List of generation results with metadata
|
| 178 |
+
"""
|
| 179 |
+
self._load_pipeline()
|
| 180 |
+
|
| 181 |
+
# Analyze emotion if provided
|
| 182 |
+
emotion_analysis = None
|
| 183 |
+
if emotion_input.strip():
|
| 184 |
+
emotion_analysis = self.analyze_emotion(emotion_input, emotion_type, contextual_text)
|
| 185 |
+
|
| 186 |
+
# Create enhanced prompt
|
| 187 |
+
if emotion_analysis:
|
| 188 |
+
enhanced_prompt = self.prompt_enhancer.enhance_prompt_with_emotion(
|
| 189 |
+
text_prompt, style, emotion_analysis, enhancement_strength
|
| 190 |
+
)
|
| 191 |
+
else:
|
| 192 |
+
enhanced_prompt = text_prompt
|
| 193 |
+
if style:
|
| 194 |
+
enhanced_prompt += f", {style}"
|
| 195 |
+
|
| 196 |
+
logger.info(f"Generating {num_images} image(s) with enhanced prompt")
|
| 197 |
+
|
| 198 |
+
results = []
|
| 199 |
+
|
| 200 |
+
for i in range(num_images):
|
| 201 |
+
# Set up generation parameters
|
| 202 |
+
current_seed = seed if seed is not None else torch.seed()
|
| 203 |
+
generator = torch.Generator(device=self.device).manual_seed(current_seed)
|
| 204 |
+
|
| 205 |
+
# Generate image
|
| 206 |
+
with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
|
| 207 |
+
result = self.pipe(
|
| 208 |
+
enhanced_prompt,
|
| 209 |
+
height=height,
|
| 210 |
+
width=width,
|
| 211 |
+
num_inference_steps=num_inference_steps,
|
| 212 |
+
guidance_scale=guidance_scale,
|
| 213 |
+
generator=generator
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
image = result.images[0]
|
| 217 |
+
|
| 218 |
+
# Create metadata
|
| 219 |
+
metadata = {
|
| 220 |
+
"timestamp": datetime.now().isoformat(),
|
| 221 |
+
"text_prompt": text_prompt,
|
| 222 |
+
"style": style,
|
| 223 |
+
"emotion_input": emotion_input,
|
| 224 |
+
"emotion_type": emotion_type,
|
| 225 |
+
"contextual_text": contextual_text,
|
| 226 |
+
"enhancement_strength": enhancement_strength,
|
| 227 |
+
"enhanced_prompt": enhanced_prompt,
|
| 228 |
+
"generation_params": {
|
| 229 |
+
"height": height,
|
| 230 |
+
"width": width,
|
| 231 |
+
"num_inference_steps": num_inference_steps,
|
| 232 |
+
"guidance_scale": guidance_scale,
|
| 233 |
+
"seed": current_seed,
|
| 234 |
+
"model": self.model_name
|
| 235 |
+
},
|
| 236 |
+
"device": self.device,
|
| 237 |
+
"phase": "2C_emotion_to_image"
|
| 238 |
+
}
|
| 239 |
+
|
| 240 |
+
# Add emotion analysis to metadata
|
| 241 |
+
if emotion_analysis:
|
| 242 |
+
metadata["emotion_analysis"] = emotion_analysis.to_dict()
|
| 243 |
+
metadata["emotion_tags"] = self.prompt_enhancer.generate_emotion_tags(emotion_analysis)
|
| 244 |
+
|
| 245 |
+
# Generate filename
|
| 246 |
+
filename = self._generate_filename(
|
| 247 |
+
text_prompt, style, emotion_analysis, current_seed, i + 1
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
# Save image and metadata
|
| 251 |
+
image_path = self.output_dir / f"{filename}.png"
|
| 252 |
+
metadata_path = self.output_dir / f"{filename}_metadata.json"
|
| 253 |
+
|
| 254 |
+
image.save(image_path)
|
| 255 |
+
with open(metadata_path, 'w') as f:
|
| 256 |
+
json.dump(metadata, f, indent=2)
|
| 257 |
+
|
| 258 |
+
results.append({
|
| 259 |
+
"image": image,
|
| 260 |
+
"image_path": str(image_path),
|
| 261 |
+
"metadata_path": str(metadata_path),
|
| 262 |
+
"metadata": metadata,
|
| 263 |
+
"filename": filename,
|
| 264 |
+
"emotion_analysis": emotion_analysis
|
| 265 |
+
})
|
| 266 |
+
|
| 267 |
+
logger.info(f"Generated image {i+1}/{num_images}: {filename}")
|
| 268 |
+
|
| 269 |
+
return results
|
| 270 |
+
|
| 271 |
+
def _generate_filename(
|
| 272 |
+
self,
|
| 273 |
+
prompt: str,
|
| 274 |
+
style: str,
|
| 275 |
+
emotion_analysis: Optional[EmotionAnalysis],
|
| 276 |
+
seed: int,
|
| 277 |
+
variation: int
|
| 278 |
+
) -> str:
|
| 279 |
+
"""Generate descriptive filename following CompI conventions"""
|
| 280 |
+
|
| 281 |
+
# Create prompt slug (first 5 words)
|
| 282 |
+
prompt_words = prompt.lower().replace(',', '').split()[:5]
|
| 283 |
+
prompt_slug = "_".join(prompt_words)
|
| 284 |
+
|
| 285 |
+
# Create style slug
|
| 286 |
+
style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
|
| 287 |
+
|
| 288 |
+
# Create emotion slug
|
| 289 |
+
if emotion_analysis:
|
| 290 |
+
emotion_slug = f"{emotion_analysis.primary_emotion.value}_{emotion_analysis.intensity_level}"[:15]
|
| 291 |
+
else:
|
| 292 |
+
emotion_slug = "neutral"
|
| 293 |
+
|
| 294 |
+
# Timestamp
|
| 295 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 296 |
+
|
| 297 |
+
# Combine all elements
|
| 298 |
+
filename = f"{prompt_slug}_{style_slug}_{emotion_slug}_{timestamp}_seed{seed}_EMO_v{variation}"
|
| 299 |
+
|
| 300 |
+
return filename
|
| 301 |
+
|
| 302 |
+
def batch_process_emotions(
|
| 303 |
+
self,
|
| 304 |
+
text_prompt: str,
|
| 305 |
+
style: str,
|
| 306 |
+
emotions: List[str],
|
| 307 |
+
emotion_type: str = "auto",
|
| 308 |
+
**generation_kwargs
|
| 309 |
+
) -> List[Dict]:
|
| 310 |
+
"""
|
| 311 |
+
Process multiple emotions in batch
|
| 312 |
+
|
| 313 |
+
Args:
|
| 314 |
+
text_prompt: Base text prompt for all generations
|
| 315 |
+
style: Art style
|
| 316 |
+
emotions: List of emotions to process
|
| 317 |
+
emotion_type: Type of emotion input
|
| 318 |
+
**generation_kwargs: Additional generation parameters
|
| 319 |
+
|
| 320 |
+
Returns:
|
| 321 |
+
List of all generation results
|
| 322 |
+
"""
|
| 323 |
+
logger.info(f"Processing {len(emotions)} emotions in batch")
|
| 324 |
+
|
| 325 |
+
all_results = []
|
| 326 |
+
for i, emotion in enumerate(emotions):
|
| 327 |
+
logger.info(f"Processing emotion {i+1}/{len(emotions)}: {emotion}")
|
| 328 |
+
|
| 329 |
+
try:
|
| 330 |
+
results = self.generate_image(
|
| 331 |
+
text_prompt=text_prompt,
|
| 332 |
+
style=style,
|
| 333 |
+
emotion_input=emotion,
|
| 334 |
+
emotion_type=emotion_type,
|
| 335 |
+
**generation_kwargs
|
| 336 |
+
)
|
| 337 |
+
all_results.extend(results)
|
| 338 |
+
|
| 339 |
+
except Exception as e:
|
| 340 |
+
logger.error(f"Error processing emotion '{emotion}': {e}")
|
| 341 |
+
continue
|
| 342 |
+
|
| 343 |
+
logger.info(f"Batch processing complete: {len(all_results)} images generated")
|
| 344 |
+
return all_results
|
| 345 |
+
|
| 346 |
+
def generate_emotion_palette_art(
|
| 347 |
+
self,
|
| 348 |
+
text_prompt: str,
|
| 349 |
+
style: str,
|
| 350 |
+
emotion_input: str,
|
| 351 |
+
use_color_conditioning: bool = True,
|
| 352 |
+
**generation_kwargs
|
| 353 |
+
) -> List[Dict]:
|
| 354 |
+
"""
|
| 355 |
+
Generate art using emotion-derived color palettes
|
| 356 |
+
|
| 357 |
+
Args:
|
| 358 |
+
text_prompt: Base text prompt
|
| 359 |
+
style: Art style
|
| 360 |
+
emotion_input: Emotion input
|
| 361 |
+
use_color_conditioning: Whether to add color palette to prompt
|
| 362 |
+
**generation_kwargs: Additional generation parameters
|
| 363 |
+
|
| 364 |
+
Returns:
|
| 365 |
+
List of generation results with color palette conditioning
|
| 366 |
+
"""
|
| 367 |
+
# Analyze emotion to get color palette
|
| 368 |
+
emotion_analysis = self.analyze_emotion(emotion_input)
|
| 369 |
+
|
| 370 |
+
# Enhance prompt with color information if requested
|
| 371 |
+
if use_color_conditioning and emotion_analysis:
|
| 372 |
+
color_names = self._hex_to_color_names(emotion_analysis.color_palette)
|
| 373 |
+
color_prompt = f"with a color palette of {', '.join(color_names)}"
|
| 374 |
+
enhanced_text_prompt = f"{text_prompt}, {color_prompt}"
|
| 375 |
+
else:
|
| 376 |
+
enhanced_text_prompt = text_prompt
|
| 377 |
+
|
| 378 |
+
return self.generate_image(
|
| 379 |
+
text_prompt=enhanced_text_prompt,
|
| 380 |
+
style=style,
|
| 381 |
+
emotion_input=emotion_input,
|
| 382 |
+
**generation_kwargs
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
def _hex_to_color_names(self, hex_colors: List[str]) -> List[str]:
|
| 386 |
+
"""Convert hex colors to approximate color names"""
|
| 387 |
+
color_mapping = {
|
| 388 |
+
"#FFD700": "golden", "#FFA500": "orange", "#FF69B4": "pink",
|
| 389 |
+
"#00CED1": "turquoise", "#32CD32": "lime", "#4169E1": "blue",
|
| 390 |
+
"#6495ED": "cornflower", "#708090": "slate", "#2F4F4F": "dark slate",
|
| 391 |
+
"#191970": "midnight blue", "#DC143C": "crimson", "#B22222": "firebrick",
|
| 392 |
+
"#8B0000": "dark red", "#FF4500": "orange red", "#FF6347": "tomato",
|
| 393 |
+
"#800080": "purple", "#4B0082": "indigo", "#2E2E2E": "dark gray",
|
| 394 |
+
"#696969": "dim gray", "#A9A9A9": "dark gray", "#FF1493": "deep pink",
|
| 395 |
+
"#FFB6C1": "light pink", "#FFC0CB": "pink", "#FFFF00": "yellow",
|
| 396 |
+
"#C71585": "medium violet", "#DB7093": "pale violet", "#20B2AA": "light sea green",
|
| 397 |
+
"#48D1CC": "medium turquoise", "#40E0D0": "turquoise", "#AFEEEE": "pale turquoise",
|
| 398 |
+
"#9370DB": "medium purple", "#8A2BE2": "blue violet", "#7B68EE": "medium slate blue",
|
| 399 |
+
"#6A5ACD": "slate blue", "#483D8B": "dark slate blue", "#808080": "gray",
|
| 400 |
+
"#C0C0C0": "silver", "#D3D3D3": "light gray", "#DCDCDC": "gainsboro"
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
color_names = []
|
| 404 |
+
for hex_color in hex_colors:
|
| 405 |
+
color_name = color_mapping.get(hex_color.upper(), "colorful")
|
| 406 |
+
color_names.append(color_name)
|
| 407 |
+
|
| 408 |
+
return color_names
|
src/generators/compi_phase2d_realtime_to_image.py
ADDED
|
@@ -0,0 +1,483 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Phase 2.D: Real-Time Data Feeds to Image Generation
|
| 3 |
+
|
| 4 |
+
This module implements real-time data-driven AI art generation that combines:
|
| 5 |
+
- Weather data integration from multiple APIs
|
| 6 |
+
- News headlines and RSS feed processing
|
| 7 |
+
- Financial market data incorporation
|
| 8 |
+
- Real-time context analysis and summarization
|
| 9 |
+
- Intelligent fusion of real-time data with creative prompts
|
| 10 |
+
|
| 11 |
+
Features:
|
| 12 |
+
- Support for weather, news, and financial data feeds
|
| 13 |
+
- Automatic data caching and rate limiting
|
| 14 |
+
- Context-aware prompt enhancement
|
| 15 |
+
- Temporal and thematic analysis
|
| 16 |
+
- Comprehensive metadata logging and filename conventions
|
| 17 |
+
- Batch processing capabilities for multiple data sources
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
import os
|
| 21 |
+
import sys
|
| 22 |
+
import torch
|
| 23 |
+
import json
|
| 24 |
+
from datetime import datetime
|
| 25 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
import logging
|
| 28 |
+
|
| 29 |
+
# Add project root to path
|
| 30 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 31 |
+
|
| 32 |
+
from diffusers import StableDiffusionPipeline
|
| 33 |
+
from PIL import Image
|
| 34 |
+
|
| 35 |
+
from src.utils.realtime_data_utils import (
|
| 36 |
+
RealTimeDataProcessor, RealTimeContext, DataFeedType, RealTimeDataPoint
|
| 37 |
+
)
|
| 38 |
+
from src.utils.logging_utils import setup_logger
|
| 39 |
+
from src.utils.file_utils import ensure_directory_exists, generate_filename
|
| 40 |
+
|
| 41 |
+
# Setup logging
|
| 42 |
+
logger = setup_logger(__name__)
|
| 43 |
+
|
| 44 |
+
class CompIPhase2DRealTimeToImage:
|
| 45 |
+
"""
|
| 46 |
+
CompI Phase 2.D: Real-Time Data Feeds to Image Generation System
|
| 47 |
+
|
| 48 |
+
Transforms real-time data feeds into AI-generated art
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
def __init__(
|
| 52 |
+
self,
|
| 53 |
+
model_name: str = "runwayml/stable-diffusion-v1-5",
|
| 54 |
+
device: str = "auto",
|
| 55 |
+
output_dir: str = "outputs"
|
| 56 |
+
):
|
| 57 |
+
"""
|
| 58 |
+
Initialize the real-time data-to-image generation system
|
| 59 |
+
|
| 60 |
+
Args:
|
| 61 |
+
model_name: Stable Diffusion model to use
|
| 62 |
+
device: Device for inference (auto, cpu, cuda)
|
| 63 |
+
output_dir: Directory for saving generated images
|
| 64 |
+
"""
|
| 65 |
+
self.model_name = model_name
|
| 66 |
+
self.device = self._setup_device(device)
|
| 67 |
+
self.output_dir = Path(output_dir)
|
| 68 |
+
ensure_directory_exists(self.output_dir)
|
| 69 |
+
|
| 70 |
+
# Initialize components
|
| 71 |
+
self.pipe = None
|
| 72 |
+
self.data_processor = RealTimeDataProcessor()
|
| 73 |
+
|
| 74 |
+
logger.info(f"Initialized CompI Phase 2.D on {self.device}")
|
| 75 |
+
|
| 76 |
+
def _setup_device(self, device: str) -> str:
|
| 77 |
+
"""Setup and validate device"""
|
| 78 |
+
if device == "auto":
|
| 79 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 80 |
+
|
| 81 |
+
if device == "cuda" and not torch.cuda.is_available():
|
| 82 |
+
logger.warning("CUDA requested but not available, falling back to CPU")
|
| 83 |
+
device = "cpu"
|
| 84 |
+
|
| 85 |
+
return device
|
| 86 |
+
|
| 87 |
+
def _load_pipeline(self):
|
| 88 |
+
"""Lazy load the Stable Diffusion pipeline"""
|
| 89 |
+
if self.pipe is None:
|
| 90 |
+
logger.info(f"Loading Stable Diffusion model: {self.model_name}")
|
| 91 |
+
|
| 92 |
+
# Custom safety checker that allows creative content
|
| 93 |
+
def dummy_safety_checker(images, **kwargs):
|
| 94 |
+
return images, [False] * len(images)
|
| 95 |
+
|
| 96 |
+
self.pipe = StableDiffusionPipeline.from_pretrained(
|
| 97 |
+
self.model_name,
|
| 98 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 99 |
+
safety_checker=dummy_safety_checker,
|
| 100 |
+
requires_safety_checker=False
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
self.pipe = self.pipe.to(self.device)
|
| 104 |
+
self.pipe.enable_attention_slicing()
|
| 105 |
+
|
| 106 |
+
if self.device == "cuda":
|
| 107 |
+
self.pipe.enable_model_cpu_offload()
|
| 108 |
+
|
| 109 |
+
logger.info("Stable Diffusion pipeline loaded successfully")
|
| 110 |
+
|
| 111 |
+
def fetch_realtime_context(
|
| 112 |
+
self,
|
| 113 |
+
include_weather: bool = False,
|
| 114 |
+
weather_city: str = "New York",
|
| 115 |
+
weather_api_key: Optional[str] = None,
|
| 116 |
+
include_news: bool = False,
|
| 117 |
+
news_category: str = "general",
|
| 118 |
+
max_news: int = 3,
|
| 119 |
+
news_api_key: Optional[str] = None,
|
| 120 |
+
include_financial: bool = False
|
| 121 |
+
) -> RealTimeContext:
|
| 122 |
+
"""
|
| 123 |
+
Fetch real-time context from various data sources
|
| 124 |
+
|
| 125 |
+
Args:
|
| 126 |
+
include_weather: Whether to include weather data
|
| 127 |
+
weather_city: City for weather data
|
| 128 |
+
weather_api_key: Optional weather API key
|
| 129 |
+
include_news: Whether to include news data
|
| 130 |
+
news_category: Category of news to fetch
|
| 131 |
+
max_news: Maximum number of news items
|
| 132 |
+
news_api_key: Optional news API key
|
| 133 |
+
include_financial: Whether to include financial data
|
| 134 |
+
|
| 135 |
+
Returns:
|
| 136 |
+
RealTimeContext with processed data
|
| 137 |
+
"""
|
| 138 |
+
logger.info("Fetching real-time context for art generation")
|
| 139 |
+
|
| 140 |
+
return self.data_processor.fetch_realtime_context(
|
| 141 |
+
include_weather=include_weather,
|
| 142 |
+
weather_city=weather_city,
|
| 143 |
+
include_news=include_news,
|
| 144 |
+
news_category=news_category,
|
| 145 |
+
max_news=max_news,
|
| 146 |
+
include_financial=include_financial,
|
| 147 |
+
weather_api_key=weather_api_key,
|
| 148 |
+
news_api_key=news_api_key
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
def generate_image(
|
| 152 |
+
self,
|
| 153 |
+
text_prompt: str,
|
| 154 |
+
style: str = "",
|
| 155 |
+
mood: str = "",
|
| 156 |
+
include_weather: bool = False,
|
| 157 |
+
weather_city: str = "New York",
|
| 158 |
+
weather_api_key: Optional[str] = None,
|
| 159 |
+
include_news: bool = False,
|
| 160 |
+
news_category: str = "general",
|
| 161 |
+
max_news: int = 3,
|
| 162 |
+
news_api_key: Optional[str] = None,
|
| 163 |
+
include_financial: bool = False,
|
| 164 |
+
context_strength: float = 0.7,
|
| 165 |
+
num_images: int = 1,
|
| 166 |
+
height: int = 512,
|
| 167 |
+
width: int = 512,
|
| 168 |
+
num_inference_steps: int = 30,
|
| 169 |
+
guidance_scale: float = 7.5,
|
| 170 |
+
seed: Optional[int] = None
|
| 171 |
+
) -> List[Dict]:
|
| 172 |
+
"""
|
| 173 |
+
Generate images with real-time data conditioning
|
| 174 |
+
|
| 175 |
+
Args:
|
| 176 |
+
text_prompt: Base text prompt
|
| 177 |
+
style: Art style
|
| 178 |
+
mood: Mood/atmosphere
|
| 179 |
+
include_weather: Whether to include weather data
|
| 180 |
+
weather_city: City for weather data
|
| 181 |
+
weather_api_key: Optional weather API key
|
| 182 |
+
include_news: Whether to include news data
|
| 183 |
+
news_category: Category of news to fetch
|
| 184 |
+
max_news: Maximum number of news items
|
| 185 |
+
news_api_key: Optional news API key
|
| 186 |
+
include_financial: Whether to include financial data
|
| 187 |
+
context_strength: How strongly to apply real-time context (0-1)
|
| 188 |
+
num_images: Number of images to generate
|
| 189 |
+
height: Image height
|
| 190 |
+
width: Image width
|
| 191 |
+
num_inference_steps: Number of diffusion steps
|
| 192 |
+
guidance_scale: Guidance scale for generation
|
| 193 |
+
seed: Random seed for reproducibility
|
| 194 |
+
|
| 195 |
+
Returns:
|
| 196 |
+
List of generation results with metadata
|
| 197 |
+
"""
|
| 198 |
+
self._load_pipeline()
|
| 199 |
+
|
| 200 |
+
# Fetch real-time context if any data sources are enabled
|
| 201 |
+
realtime_context = None
|
| 202 |
+
if include_weather or include_news or include_financial:
|
| 203 |
+
realtime_context = self.fetch_realtime_context(
|
| 204 |
+
include_weather=include_weather,
|
| 205 |
+
weather_city=weather_city,
|
| 206 |
+
weather_api_key=weather_api_key,
|
| 207 |
+
include_news=include_news,
|
| 208 |
+
news_category=news_category,
|
| 209 |
+
max_news=max_news,
|
| 210 |
+
news_api_key=news_api_key,
|
| 211 |
+
include_financial=include_financial
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# Create enhanced prompt
|
| 215 |
+
enhanced_prompt = self._create_enhanced_prompt(
|
| 216 |
+
text_prompt, style, mood, realtime_context, context_strength
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
logger.info(f"Generating {num_images} image(s) with real-time context")
|
| 220 |
+
|
| 221 |
+
results = []
|
| 222 |
+
|
| 223 |
+
for i in range(num_images):
|
| 224 |
+
# Set up generation parameters
|
| 225 |
+
current_seed = seed if seed is not None else torch.seed()
|
| 226 |
+
generator = torch.Generator(device=self.device).manual_seed(current_seed)
|
| 227 |
+
|
| 228 |
+
# Generate image
|
| 229 |
+
with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
|
| 230 |
+
result = self.pipe(
|
| 231 |
+
enhanced_prompt,
|
| 232 |
+
height=height,
|
| 233 |
+
width=width,
|
| 234 |
+
num_inference_steps=num_inference_steps,
|
| 235 |
+
guidance_scale=guidance_scale,
|
| 236 |
+
generator=generator
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
image = result.images[0]
|
| 240 |
+
|
| 241 |
+
# Create metadata
|
| 242 |
+
metadata = {
|
| 243 |
+
"timestamp": datetime.now().isoformat(),
|
| 244 |
+
"text_prompt": text_prompt,
|
| 245 |
+
"style": style,
|
| 246 |
+
"mood": mood,
|
| 247 |
+
"enhanced_prompt": enhanced_prompt,
|
| 248 |
+
"context_strength": context_strength,
|
| 249 |
+
"data_sources": {
|
| 250 |
+
"weather": include_weather,
|
| 251 |
+
"news": include_news,
|
| 252 |
+
"financial": include_financial
|
| 253 |
+
},
|
| 254 |
+
"generation_params": {
|
| 255 |
+
"height": height,
|
| 256 |
+
"width": width,
|
| 257 |
+
"num_inference_steps": num_inference_steps,
|
| 258 |
+
"guidance_scale": guidance_scale,
|
| 259 |
+
"seed": current_seed,
|
| 260 |
+
"model": self.model_name
|
| 261 |
+
},
|
| 262 |
+
"device": self.device,
|
| 263 |
+
"phase": "2D_realtime_to_image"
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
# Add real-time context to metadata
|
| 267 |
+
if realtime_context:
|
| 268 |
+
metadata["realtime_context"] = realtime_context.to_dict()
|
| 269 |
+
|
| 270 |
+
# Generate filename
|
| 271 |
+
filename = self._generate_filename(
|
| 272 |
+
text_prompt, style, realtime_context, current_seed, i + 1
|
| 273 |
+
)
|
| 274 |
+
|
| 275 |
+
# Save image and metadata
|
| 276 |
+
image_path = self.output_dir / f"{filename}.png"
|
| 277 |
+
metadata_path = self.output_dir / f"{filename}_metadata.json"
|
| 278 |
+
|
| 279 |
+
image.save(image_path)
|
| 280 |
+
with open(metadata_path, 'w') as f:
|
| 281 |
+
json.dump(metadata, f, indent=2)
|
| 282 |
+
|
| 283 |
+
results.append({
|
| 284 |
+
"image": image,
|
| 285 |
+
"image_path": str(image_path),
|
| 286 |
+
"metadata_path": str(metadata_path),
|
| 287 |
+
"metadata": metadata,
|
| 288 |
+
"filename": filename,
|
| 289 |
+
"realtime_context": realtime_context
|
| 290 |
+
})
|
| 291 |
+
|
| 292 |
+
logger.info(f"Generated image {i+1}/{num_images}: {filename}")
|
| 293 |
+
|
| 294 |
+
return results
|
| 295 |
+
|
| 296 |
+
def _create_enhanced_prompt(
|
| 297 |
+
self,
|
| 298 |
+
text_prompt: str,
|
| 299 |
+
style: str,
|
| 300 |
+
mood: str,
|
| 301 |
+
realtime_context: Optional[RealTimeContext],
|
| 302 |
+
context_strength: float
|
| 303 |
+
) -> str:
|
| 304 |
+
"""
|
| 305 |
+
Create enhanced prompt with real-time context
|
| 306 |
+
|
| 307 |
+
Args:
|
| 308 |
+
text_prompt: Base text prompt
|
| 309 |
+
style: Art style
|
| 310 |
+
mood: Mood/atmosphere
|
| 311 |
+
realtime_context: Real-time context data
|
| 312 |
+
context_strength: How strongly to apply context (0-1)
|
| 313 |
+
|
| 314 |
+
Returns:
|
| 315 |
+
Enhanced prompt with real-time context
|
| 316 |
+
"""
|
| 317 |
+
enhanced_prompt = text_prompt.strip()
|
| 318 |
+
|
| 319 |
+
# Add style
|
| 320 |
+
if style:
|
| 321 |
+
enhanced_prompt += f", {style}"
|
| 322 |
+
|
| 323 |
+
# Add mood
|
| 324 |
+
if mood:
|
| 325 |
+
enhanced_prompt += f", {mood}"
|
| 326 |
+
|
| 327 |
+
# Add real-time context based on strength
|
| 328 |
+
if realtime_context and context_strength > 0:
|
| 329 |
+
if context_strength > 0.7:
|
| 330 |
+
# Strong context integration
|
| 331 |
+
enhanced_prompt += f", {realtime_context.artistic_inspiration}"
|
| 332 |
+
if realtime_context.mood_indicators:
|
| 333 |
+
mood_text = ", ".join(realtime_context.mood_indicators[:2])
|
| 334 |
+
enhanced_prompt += f", with {mood_text} influences"
|
| 335 |
+
|
| 336 |
+
elif context_strength > 0.4:
|
| 337 |
+
# Moderate context integration
|
| 338 |
+
enhanced_prompt += f", {realtime_context.artistic_inspiration}"
|
| 339 |
+
|
| 340 |
+
else:
|
| 341 |
+
# Subtle context integration
|
| 342 |
+
if realtime_context.key_themes:
|
| 343 |
+
theme = realtime_context.key_themes[0]
|
| 344 |
+
enhanced_prompt += f", inspired by {theme}"
|
| 345 |
+
|
| 346 |
+
return enhanced_prompt
|
| 347 |
+
|
| 348 |
+
def _generate_filename(
|
| 349 |
+
self,
|
| 350 |
+
prompt: str,
|
| 351 |
+
style: str,
|
| 352 |
+
realtime_context: Optional[RealTimeContext],
|
| 353 |
+
seed: int,
|
| 354 |
+
variation: int
|
| 355 |
+
) -> str:
|
| 356 |
+
"""Generate descriptive filename following CompI conventions"""
|
| 357 |
+
|
| 358 |
+
# Create prompt slug (first 5 words)
|
| 359 |
+
prompt_words = prompt.lower().replace(',', '').split()[:5]
|
| 360 |
+
prompt_slug = "_".join(prompt_words)
|
| 361 |
+
|
| 362 |
+
# Create style slug
|
| 363 |
+
style_slug = style.replace(" ", "").replace(",", "")[:10] if style else "standard"
|
| 364 |
+
|
| 365 |
+
# Create context slug
|
| 366 |
+
if realtime_context and realtime_context.data_points:
|
| 367 |
+
context_types = []
|
| 368 |
+
for dp in realtime_context.data_points:
|
| 369 |
+
context_types.append(dp.feed_type.value[:3]) # First 3 chars
|
| 370 |
+
context_slug = "_".join(set(context_types))[:15]
|
| 371 |
+
else:
|
| 372 |
+
context_slug = "static"
|
| 373 |
+
|
| 374 |
+
# Timestamp
|
| 375 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 376 |
+
|
| 377 |
+
# Combine all elements
|
| 378 |
+
filename = f"{prompt_slug}_{style_slug}_{context_slug}_{timestamp}_seed{seed}_RTDATA_v{variation}"
|
| 379 |
+
|
| 380 |
+
return filename
|
| 381 |
+
|
| 382 |
+
def batch_process_data_sources(
|
| 383 |
+
self,
|
| 384 |
+
text_prompt: str,
|
| 385 |
+
style: str,
|
| 386 |
+
data_source_configs: List[Dict],
|
| 387 |
+
**generation_kwargs
|
| 388 |
+
) -> List[Dict]:
|
| 389 |
+
"""
|
| 390 |
+
Process multiple data source configurations in batch
|
| 391 |
+
|
| 392 |
+
Args:
|
| 393 |
+
text_prompt: Base text prompt for all generations
|
| 394 |
+
style: Art style
|
| 395 |
+
data_source_configs: List of data source configuration dictionaries
|
| 396 |
+
**generation_kwargs: Additional generation parameters
|
| 397 |
+
|
| 398 |
+
Returns:
|
| 399 |
+
List of all generation results
|
| 400 |
+
"""
|
| 401 |
+
logger.info(f"Processing {len(data_source_configs)} data source configurations")
|
| 402 |
+
|
| 403 |
+
all_results = []
|
| 404 |
+
for i, config in enumerate(data_source_configs):
|
| 405 |
+
logger.info(f"Processing configuration {i+1}/{len(data_source_configs)}")
|
| 406 |
+
|
| 407 |
+
try:
|
| 408 |
+
results = self.generate_image(
|
| 409 |
+
text_prompt=text_prompt,
|
| 410 |
+
style=style,
|
| 411 |
+
**config,
|
| 412 |
+
**generation_kwargs
|
| 413 |
+
)
|
| 414 |
+
all_results.extend(results)
|
| 415 |
+
|
| 416 |
+
except Exception as e:
|
| 417 |
+
logger.error(f"Error processing configuration {i+1}: {e}")
|
| 418 |
+
continue
|
| 419 |
+
|
| 420 |
+
logger.info(f"Batch processing complete: {len(all_results)} images generated")
|
| 421 |
+
return all_results
|
| 422 |
+
|
| 423 |
+
def generate_temporal_series(
|
| 424 |
+
self,
|
| 425 |
+
text_prompt: str,
|
| 426 |
+
style: str,
|
| 427 |
+
data_config: Dict,
|
| 428 |
+
time_intervals: List[int],
|
| 429 |
+
**generation_kwargs
|
| 430 |
+
) -> List[Dict]:
|
| 431 |
+
"""
|
| 432 |
+
Generate a series of images with real-time data at different time intervals
|
| 433 |
+
|
| 434 |
+
Args:
|
| 435 |
+
text_prompt: Base text prompt
|
| 436 |
+
style: Art style
|
| 437 |
+
data_config: Data source configuration
|
| 438 |
+
time_intervals: List of time intervals in minutes between generations
|
| 439 |
+
**generation_kwargs: Additional generation parameters
|
| 440 |
+
|
| 441 |
+
Returns:
|
| 442 |
+
List of generation results across time
|
| 443 |
+
"""
|
| 444 |
+
import time
|
| 445 |
+
|
| 446 |
+
logger.info(f"Generating temporal series with {len(time_intervals)} intervals")
|
| 447 |
+
|
| 448 |
+
all_results = []
|
| 449 |
+
|
| 450 |
+
for i, interval in enumerate(time_intervals):
|
| 451 |
+
if i > 0: # Don't wait before first generation
|
| 452 |
+
logger.info(f"Waiting {interval} minutes before next generation...")
|
| 453 |
+
time.sleep(interval * 60) # Convert minutes to seconds
|
| 454 |
+
|
| 455 |
+
logger.info(f"Generating image {i+1}/{len(time_intervals)}")
|
| 456 |
+
|
| 457 |
+
try:
|
| 458 |
+
# Clear cache to ensure fresh data
|
| 459 |
+
self.data_processor.cache.cache.clear()
|
| 460 |
+
|
| 461 |
+
results = self.generate_image(
|
| 462 |
+
text_prompt=text_prompt,
|
| 463 |
+
style=style,
|
| 464 |
+
**data_config,
|
| 465 |
+
**generation_kwargs
|
| 466 |
+
)
|
| 467 |
+
|
| 468 |
+
# Add temporal metadata
|
| 469 |
+
for result in results:
|
| 470 |
+
result["metadata"]["temporal_series"] = {
|
| 471 |
+
"series_index": i,
|
| 472 |
+
"total_in_series": len(time_intervals),
|
| 473 |
+
"interval_minutes": interval if i > 0 else 0
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
all_results.extend(results)
|
| 477 |
+
|
| 478 |
+
except Exception as e:
|
| 479 |
+
logger.error(f"Error in temporal generation {i+1}: {e}")
|
| 480 |
+
continue
|
| 481 |
+
|
| 482 |
+
logger.info(f"Temporal series complete: {len(all_results)} images generated")
|
| 483 |
+
return all_results
|
src/generators/compi_phase2e_refimg_to_image.py
ADDED
|
@@ -0,0 +1,578 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Phase 2.E: Style Reference/Example Image to AI Art Generation
|
| 3 |
+
|
| 4 |
+
This module implements multimodal AI art generation that combines:
|
| 5 |
+
- Text prompts with style and mood conditioning
|
| 6 |
+
- Reference image style transfer and guidance
|
| 7 |
+
- Image-to-image generation with controllable strength
|
| 8 |
+
- Support for both local files and web URLs
|
| 9 |
+
- Advanced style analysis and prompt enhancement
|
| 10 |
+
|
| 11 |
+
Features:
|
| 12 |
+
- Support for various image formats and web sources
|
| 13 |
+
- Real-time image analysis and style suggestion
|
| 14 |
+
- Controllable reference strength for creative flexibility
|
| 15 |
+
- Comprehensive metadata logging and filename conventions
|
| 16 |
+
- Batch processing capabilities with multiple variations
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import os
|
| 20 |
+
import sys
|
| 21 |
+
import torch
|
| 22 |
+
import json
|
| 23 |
+
from datetime import datetime
|
| 24 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
import logging
|
| 27 |
+
|
| 28 |
+
# Add project root to path
|
| 29 |
+
sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
|
| 30 |
+
|
| 31 |
+
from diffusers import StableDiffusionImg2ImgPipeline, StableDiffusionPipeline
|
| 32 |
+
from PIL import Image
|
| 33 |
+
import numpy as np
|
| 34 |
+
|
| 35 |
+
from src.utils.image_utils import ImageProcessor, StyleAnalyzer
|
| 36 |
+
from src.utils.logging_utils import setup_logger
|
| 37 |
+
from src.utils.file_utils import ensure_directory_exists, generate_filename
|
| 38 |
+
from src.config import (
|
| 39 |
+
STABLE_DIFFUSION_IMG2IMG_MODEL,
|
| 40 |
+
OUTPUTS_DIR,
|
| 41 |
+
DEFAULT_IMAGE_SIZE,
|
| 42 |
+
DEFAULT_INFERENCE_STEPS,
|
| 43 |
+
DEFAULT_GUIDANCE_SCALE
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
# Setup logging
|
| 47 |
+
logger = setup_logger(__name__)
|
| 48 |
+
|
| 49 |
+
class CompIPhase2ERefImageToImage:
|
| 50 |
+
"""
|
| 51 |
+
CompI Phase 2.E: Style Reference/Example Image to AI Art Generation System
|
| 52 |
+
|
| 53 |
+
Combines text prompts with reference image style guidance for enhanced creativity
|
| 54 |
+
"""
|
| 55 |
+
|
| 56 |
+
def __init__(
|
| 57 |
+
self,
|
| 58 |
+
model_name: str = STABLE_DIFFUSION_IMG2IMG_MODEL,
|
| 59 |
+
device: Optional[str] = None,
|
| 60 |
+
enable_attention_slicing: bool = True,
|
| 61 |
+
enable_memory_efficient_attention: bool = True
|
| 62 |
+
):
|
| 63 |
+
"""
|
| 64 |
+
Initialize the CompI Phase 2.E system
|
| 65 |
+
|
| 66 |
+
Args:
|
| 67 |
+
model_name: Hugging Face model identifier
|
| 68 |
+
device: Device to run on ('cuda', 'cpu', or None for auto)
|
| 69 |
+
enable_attention_slicing: Enable attention slicing for memory efficiency
|
| 70 |
+
enable_memory_efficient_attention: Enable memory efficient attention
|
| 71 |
+
"""
|
| 72 |
+
self.model_name = model_name
|
| 73 |
+
self.device = device or ("cuda" if torch.cuda.is_available() else "cpu")
|
| 74 |
+
|
| 75 |
+
# Initialize components
|
| 76 |
+
self.image_processor = ImageProcessor()
|
| 77 |
+
self.style_analyzer = StyleAnalyzer()
|
| 78 |
+
|
| 79 |
+
# Initialize pipelines (lazy loading)
|
| 80 |
+
self._img2img_pipeline = None
|
| 81 |
+
self._txt2img_pipeline = None
|
| 82 |
+
|
| 83 |
+
# Configuration
|
| 84 |
+
self.enable_attention_slicing = enable_attention_slicing
|
| 85 |
+
self.enable_memory_efficient_attention = enable_memory_efficient_attention
|
| 86 |
+
|
| 87 |
+
logger.info(f"Initialized CompI Phase 2.E on device: {self.device}")
|
| 88 |
+
|
| 89 |
+
@property
|
| 90 |
+
def img2img_pipeline(self) -> StableDiffusionImg2ImgPipeline:
|
| 91 |
+
"""Lazy load img2img pipeline"""
|
| 92 |
+
if self._img2img_pipeline is None:
|
| 93 |
+
logger.info(f"Loading img2img pipeline: {self.model_name}")
|
| 94 |
+
self._img2img_pipeline = StableDiffusionImg2ImgPipeline.from_pretrained(
|
| 95 |
+
self.model_name,
|
| 96 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 97 |
+
safety_checker=None, # Disabled for creative use
|
| 98 |
+
requires_safety_checker=False
|
| 99 |
+
)
|
| 100 |
+
self._img2img_pipeline = self._img2img_pipeline.to(self.device)
|
| 101 |
+
|
| 102 |
+
if self.enable_attention_slicing:
|
| 103 |
+
self._img2img_pipeline.enable_attention_slicing()
|
| 104 |
+
if self.enable_memory_efficient_attention and hasattr(self._img2img_pipeline, 'enable_memory_efficient_attention'):
|
| 105 |
+
self._img2img_pipeline.enable_memory_efficient_attention()
|
| 106 |
+
|
| 107 |
+
return self._img2img_pipeline
|
| 108 |
+
|
| 109 |
+
@property
|
| 110 |
+
def txt2img_pipeline(self) -> StableDiffusionPipeline:
|
| 111 |
+
"""Lazy load txt2img pipeline for fallback"""
|
| 112 |
+
if self._txt2img_pipeline is None:
|
| 113 |
+
logger.info(f"Loading txt2img pipeline: {self.model_name}")
|
| 114 |
+
self._txt2img_pipeline = StableDiffusionPipeline.from_pretrained(
|
| 115 |
+
self.model_name,
|
| 116 |
+
torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
|
| 117 |
+
safety_checker=None, # Disabled for creative use
|
| 118 |
+
requires_safety_checker=False
|
| 119 |
+
)
|
| 120 |
+
self._txt2img_pipeline = self._txt2img_pipeline.to(self.device)
|
| 121 |
+
|
| 122 |
+
if self.enable_attention_slicing:
|
| 123 |
+
self._txt2img_pipeline.enable_attention_slicing()
|
| 124 |
+
if self.enable_memory_efficient_attention and hasattr(self._txt2img_pipeline, 'enable_memory_efficient_attention'):
|
| 125 |
+
self._txt2img_pipeline.enable_memory_efficient_attention()
|
| 126 |
+
|
| 127 |
+
return self._txt2img_pipeline
|
| 128 |
+
|
| 129 |
+
def load_reference_image(
|
| 130 |
+
self,
|
| 131 |
+
source: Union[str, Path, Image.Image],
|
| 132 |
+
preprocess: bool = True
|
| 133 |
+
) -> Optional[Tuple[Image.Image, Dict]]:
|
| 134 |
+
"""
|
| 135 |
+
Load and analyze reference image from various sources
|
| 136 |
+
|
| 137 |
+
Args:
|
| 138 |
+
source: Image source (file path, URL, or PIL Image)
|
| 139 |
+
preprocess: Whether to preprocess the image
|
| 140 |
+
|
| 141 |
+
Returns:
|
| 142 |
+
Tuple of (processed_image, analysis_results) or None if failed
|
| 143 |
+
"""
|
| 144 |
+
try:
|
| 145 |
+
# Load image based on source type
|
| 146 |
+
if isinstance(source, Image.Image):
|
| 147 |
+
image = source.convert('RGB')
|
| 148 |
+
source_info = "PIL Image object"
|
| 149 |
+
elif isinstance(source, (str, Path)):
|
| 150 |
+
source_str = str(source)
|
| 151 |
+
if source_str.startswith(('http://', 'https://')):
|
| 152 |
+
image = self.image_processor.load_image_from_url(source_str)
|
| 153 |
+
source_info = f"URL: {source_str}"
|
| 154 |
+
else:
|
| 155 |
+
image = self.image_processor.load_image_from_file(source_str)
|
| 156 |
+
source_info = f"File: {source_str}"
|
| 157 |
+
|
| 158 |
+
if image is None:
|
| 159 |
+
return None
|
| 160 |
+
else:
|
| 161 |
+
logger.error(f"Unsupported source type: {type(source)}")
|
| 162 |
+
return None
|
| 163 |
+
|
| 164 |
+
# Preprocess if requested
|
| 165 |
+
if preprocess:
|
| 166 |
+
image = self.image_processor.preprocess_image(image, DEFAULT_IMAGE_SIZE)
|
| 167 |
+
|
| 168 |
+
# Analyze image properties
|
| 169 |
+
properties = self.image_processor.analyze_image_properties(image)
|
| 170 |
+
style_suggestions = self.style_analyzer.suggest_style_keywords(properties)
|
| 171 |
+
image_hash = self.image_processor.generate_image_hash(image)
|
| 172 |
+
|
| 173 |
+
analysis = {
|
| 174 |
+
'source': source_info,
|
| 175 |
+
'properties': properties,
|
| 176 |
+
'style_suggestions': style_suggestions,
|
| 177 |
+
'hash': image_hash,
|
| 178 |
+
'processed_size': image.size
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
logger.info(f"Successfully loaded and analyzed reference image: {analysis}")
|
| 182 |
+
return image, analysis
|
| 183 |
+
|
| 184 |
+
except Exception as e:
|
| 185 |
+
logger.error(f"Error loading reference image: {e}")
|
| 186 |
+
return None
|
| 187 |
+
|
| 188 |
+
def enhance_prompt_with_style(
|
| 189 |
+
self,
|
| 190 |
+
base_prompt: str,
|
| 191 |
+
style: str = "",
|
| 192 |
+
mood: str = "",
|
| 193 |
+
style_suggestions: List[str] = None
|
| 194 |
+
) -> str:
|
| 195 |
+
"""
|
| 196 |
+
Enhance prompt with style information from reference image
|
| 197 |
+
|
| 198 |
+
Args:
|
| 199 |
+
base_prompt: Base text prompt
|
| 200 |
+
style: Additional style keywords
|
| 201 |
+
mood: Mood/atmosphere keywords
|
| 202 |
+
style_suggestions: Suggested keywords from image analysis
|
| 203 |
+
|
| 204 |
+
Returns:
|
| 205 |
+
Enhanced prompt string
|
| 206 |
+
"""
|
| 207 |
+
try:
|
| 208 |
+
prompt_parts = [base_prompt.strip()]
|
| 209 |
+
|
| 210 |
+
# Add explicit style
|
| 211 |
+
if style.strip():
|
| 212 |
+
prompt_parts.append(style.strip())
|
| 213 |
+
|
| 214 |
+
# Add mood
|
| 215 |
+
if mood.strip():
|
| 216 |
+
prompt_parts.append(mood.strip())
|
| 217 |
+
|
| 218 |
+
# Add style suggestions from image analysis
|
| 219 |
+
if style_suggestions:
|
| 220 |
+
# Limit to top 3 suggestions to avoid prompt bloat
|
| 221 |
+
top_suggestions = style_suggestions[:3]
|
| 222 |
+
prompt_parts.extend(top_suggestions)
|
| 223 |
+
|
| 224 |
+
enhanced_prompt = ", ".join(prompt_parts)
|
| 225 |
+
logger.info(f"Enhanced prompt: {enhanced_prompt}")
|
| 226 |
+
return enhanced_prompt
|
| 227 |
+
|
| 228 |
+
except Exception as e:
|
| 229 |
+
logger.error(f"Error enhancing prompt: {e}")
|
| 230 |
+
return base_prompt
|
| 231 |
+
|
| 232 |
+
def generate_with_reference(
|
| 233 |
+
self,
|
| 234 |
+
prompt: str,
|
| 235 |
+
reference_image: Image.Image,
|
| 236 |
+
style: str = "",
|
| 237 |
+
mood: str = "",
|
| 238 |
+
strength: float = 0.5,
|
| 239 |
+
num_images: int = 1,
|
| 240 |
+
num_inference_steps: int = DEFAULT_INFERENCE_STEPS,
|
| 241 |
+
guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
|
| 242 |
+
seed: Optional[int] = None,
|
| 243 |
+
style_suggestions: List[str] = None
|
| 244 |
+
) -> List[Dict]:
|
| 245 |
+
"""
|
| 246 |
+
Generate images using reference image guidance
|
| 247 |
+
|
| 248 |
+
Args:
|
| 249 |
+
prompt: Text prompt
|
| 250 |
+
reference_image: Reference PIL Image
|
| 251 |
+
style: Style keywords
|
| 252 |
+
mood: Mood keywords
|
| 253 |
+
strength: Reference strength (0.0-1.0, higher = closer to reference)
|
| 254 |
+
num_images: Number of images to generate
|
| 255 |
+
num_inference_steps: Number of denoising steps
|
| 256 |
+
guidance_scale: Classifier-free guidance scale
|
| 257 |
+
seed: Random seed for reproducibility
|
| 258 |
+
style_suggestions: Style suggestions from image analysis
|
| 259 |
+
|
| 260 |
+
Returns:
|
| 261 |
+
List of generation results with metadata
|
| 262 |
+
"""
|
| 263 |
+
try:
|
| 264 |
+
# Enhance prompt with style information
|
| 265 |
+
enhanced_prompt = self.enhance_prompt_with_style(
|
| 266 |
+
prompt, style, mood, style_suggestions
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
results = []
|
| 270 |
+
|
| 271 |
+
for i in range(num_images):
|
| 272 |
+
# Set up random seed
|
| 273 |
+
if seed is not None:
|
| 274 |
+
current_seed = seed + i
|
| 275 |
+
else:
|
| 276 |
+
current_seed = torch.seed()
|
| 277 |
+
|
| 278 |
+
generator = torch.Generator(device=self.device).manual_seed(current_seed)
|
| 279 |
+
|
| 280 |
+
# Generate image
|
| 281 |
+
logger.info(f"Generating image {i+1}/{num_images} with reference guidance")
|
| 282 |
+
|
| 283 |
+
with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
|
| 284 |
+
result = self.img2img_pipeline(
|
| 285 |
+
prompt=enhanced_prompt,
|
| 286 |
+
image=reference_image,
|
| 287 |
+
strength=strength,
|
| 288 |
+
num_inference_steps=num_inference_steps,
|
| 289 |
+
guidance_scale=guidance_scale,
|
| 290 |
+
generator=generator
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
generated_image = result.images[0]
|
| 294 |
+
|
| 295 |
+
# Create metadata
|
| 296 |
+
metadata = {
|
| 297 |
+
'prompt': prompt,
|
| 298 |
+
'enhanced_prompt': enhanced_prompt,
|
| 299 |
+
'style': style,
|
| 300 |
+
'mood': mood,
|
| 301 |
+
'strength': strength,
|
| 302 |
+
'num_inference_steps': num_inference_steps,
|
| 303 |
+
'guidance_scale': guidance_scale,
|
| 304 |
+
'seed': current_seed,
|
| 305 |
+
'model': self.model_name,
|
| 306 |
+
'generation_type': 'img2img_reference',
|
| 307 |
+
'timestamp': datetime.now().isoformat(),
|
| 308 |
+
'device': self.device,
|
| 309 |
+
'reference_size': reference_image.size,
|
| 310 |
+
'output_size': generated_image.size,
|
| 311 |
+
'style_suggestions': style_suggestions or []
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
results.append({
|
| 315 |
+
'image': generated_image,
|
| 316 |
+
'metadata': metadata,
|
| 317 |
+
'index': i
|
| 318 |
+
})
|
| 319 |
+
|
| 320 |
+
logger.info(f"Successfully generated {len(results)} images with reference guidance")
|
| 321 |
+
return results
|
| 322 |
+
|
| 323 |
+
except Exception as e:
|
| 324 |
+
logger.error(f"Error generating images with reference: {e}")
|
| 325 |
+
return []
|
| 326 |
+
|
| 327 |
+
def generate_without_reference(
|
| 328 |
+
self,
|
| 329 |
+
prompt: str,
|
| 330 |
+
style: str = "",
|
| 331 |
+
mood: str = "",
|
| 332 |
+
num_images: int = 1,
|
| 333 |
+
num_inference_steps: int = DEFAULT_INFERENCE_STEPS,
|
| 334 |
+
guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
|
| 335 |
+
seed: Optional[int] = None
|
| 336 |
+
) -> List[Dict]:
|
| 337 |
+
"""
|
| 338 |
+
Generate images without reference (fallback to text-to-image)
|
| 339 |
+
|
| 340 |
+
Args:
|
| 341 |
+
prompt: Text prompt
|
| 342 |
+
style: Style keywords
|
| 343 |
+
mood: Mood keywords
|
| 344 |
+
num_images: Number of images to generate
|
| 345 |
+
num_inference_steps: Number of denoising steps
|
| 346 |
+
guidance_scale: Classifier-free guidance scale
|
| 347 |
+
seed: Random seed for reproducibility
|
| 348 |
+
|
| 349 |
+
Returns:
|
| 350 |
+
List of generation results with metadata
|
| 351 |
+
"""
|
| 352 |
+
try:
|
| 353 |
+
# Enhance prompt
|
| 354 |
+
enhanced_prompt = self.enhance_prompt_with_style(prompt, style, mood)
|
| 355 |
+
|
| 356 |
+
results = []
|
| 357 |
+
|
| 358 |
+
for i in range(num_images):
|
| 359 |
+
# Set up random seed
|
| 360 |
+
if seed is not None:
|
| 361 |
+
current_seed = seed + i
|
| 362 |
+
else:
|
| 363 |
+
current_seed = torch.seed()
|
| 364 |
+
|
| 365 |
+
generator = torch.Generator(device=self.device).manual_seed(current_seed)
|
| 366 |
+
|
| 367 |
+
# Generate image
|
| 368 |
+
logger.info(f"Generating image {i+1}/{num_images} without reference")
|
| 369 |
+
|
| 370 |
+
with torch.autocast(self.device) if self.device == "cuda" else torch.no_grad():
|
| 371 |
+
result = self.txt2img_pipeline(
|
| 372 |
+
prompt=enhanced_prompt,
|
| 373 |
+
height=DEFAULT_IMAGE_SIZE[1],
|
| 374 |
+
width=DEFAULT_IMAGE_SIZE[0],
|
| 375 |
+
num_inference_steps=num_inference_steps,
|
| 376 |
+
guidance_scale=guidance_scale,
|
| 377 |
+
generator=generator
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
generated_image = result.images[0]
|
| 381 |
+
|
| 382 |
+
# Create metadata
|
| 383 |
+
metadata = {
|
| 384 |
+
'prompt': prompt,
|
| 385 |
+
'enhanced_prompt': enhanced_prompt,
|
| 386 |
+
'style': style,
|
| 387 |
+
'mood': mood,
|
| 388 |
+
'num_inference_steps': num_inference_steps,
|
| 389 |
+
'guidance_scale': guidance_scale,
|
| 390 |
+
'seed': current_seed,
|
| 391 |
+
'model': self.model_name,
|
| 392 |
+
'generation_type': 'txt2img_fallback',
|
| 393 |
+
'timestamp': datetime.now().isoformat(),
|
| 394 |
+
'device': self.device,
|
| 395 |
+
'output_size': generated_image.size
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
results.append({
|
| 399 |
+
'image': generated_image,
|
| 400 |
+
'metadata': metadata,
|
| 401 |
+
'index': i
|
| 402 |
+
})
|
| 403 |
+
|
| 404 |
+
logger.info(f"Successfully generated {len(results)} images without reference")
|
| 405 |
+
return results
|
| 406 |
+
|
| 407 |
+
except Exception as e:
|
| 408 |
+
logger.error(f"Error generating images without reference: {e}")
|
| 409 |
+
return []
|
| 410 |
+
|
| 411 |
+
def save_results(
|
| 412 |
+
self,
|
| 413 |
+
results: List[Dict],
|
| 414 |
+
output_dir: Path = OUTPUTS_DIR,
|
| 415 |
+
reference_info: Optional[Dict] = None
|
| 416 |
+
) -> List[str]:
|
| 417 |
+
"""
|
| 418 |
+
Save generation results with comprehensive metadata
|
| 419 |
+
|
| 420 |
+
Args:
|
| 421 |
+
results: List of generation results
|
| 422 |
+
output_dir: Output directory
|
| 423 |
+
reference_info: Reference image information
|
| 424 |
+
|
| 425 |
+
Returns:
|
| 426 |
+
List of saved file paths
|
| 427 |
+
"""
|
| 428 |
+
try:
|
| 429 |
+
ensure_directory_exists(output_dir)
|
| 430 |
+
saved_files = []
|
| 431 |
+
|
| 432 |
+
for result in results:
|
| 433 |
+
image = result['image']
|
| 434 |
+
metadata = result['metadata']
|
| 435 |
+
index = result['index']
|
| 436 |
+
|
| 437 |
+
# Generate filename
|
| 438 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 439 |
+
prompt_slug = "_".join(metadata['prompt'].lower().split()[:5])
|
| 440 |
+
style_slug = metadata.get('style', '').replace(' ', '')[:10]
|
| 441 |
+
mood_slug = metadata.get('mood', '').replace(' ', '')[:10]
|
| 442 |
+
|
| 443 |
+
# Add reference indicator
|
| 444 |
+
ref_indicator = "REFIMG" if metadata['generation_type'] == 'img2img_reference' else "NOREFIMG"
|
| 445 |
+
|
| 446 |
+
filename = f"{prompt_slug}_{style_slug}_{mood_slug}_{timestamp}_seed{metadata['seed']}_{ref_indicator}_v{index+1}.png"
|
| 447 |
+
filepath = output_dir / filename
|
| 448 |
+
|
| 449 |
+
# Save image
|
| 450 |
+
image.save(filepath)
|
| 451 |
+
|
| 452 |
+
# Add reference info to metadata if available
|
| 453 |
+
if reference_info:
|
| 454 |
+
metadata['reference_info'] = reference_info
|
| 455 |
+
|
| 456 |
+
# Save metadata
|
| 457 |
+
metadata_filename = filepath.stem + "_metadata.json"
|
| 458 |
+
metadata_filepath = output_dir / metadata_filename
|
| 459 |
+
|
| 460 |
+
with open(metadata_filepath, 'w') as f:
|
| 461 |
+
json.dump(metadata, f, indent=2, default=str)
|
| 462 |
+
|
| 463 |
+
saved_files.extend([str(filepath), str(metadata_filepath)])
|
| 464 |
+
logger.info(f"Saved: {filepath}")
|
| 465 |
+
|
| 466 |
+
return saved_files
|
| 467 |
+
|
| 468 |
+
except Exception as e:
|
| 469 |
+
logger.error(f"Error saving results: {e}")
|
| 470 |
+
return []
|
| 471 |
+
|
| 472 |
+
def generate_batch(
|
| 473 |
+
self,
|
| 474 |
+
prompt: str,
|
| 475 |
+
reference_source: Optional[Union[str, Path, Image.Image]] = None,
|
| 476 |
+
style: str = "",
|
| 477 |
+
mood: str = "",
|
| 478 |
+
strength: float = 0.5,
|
| 479 |
+
num_images: int = 1,
|
| 480 |
+
num_inference_steps: int = DEFAULT_INFERENCE_STEPS,
|
| 481 |
+
guidance_scale: float = DEFAULT_GUIDANCE_SCALE,
|
| 482 |
+
seed: Optional[int] = None,
|
| 483 |
+
save_results: bool = True,
|
| 484 |
+
output_dir: Path = OUTPUTS_DIR
|
| 485 |
+
) -> Dict:
|
| 486 |
+
"""
|
| 487 |
+
Complete batch generation pipeline
|
| 488 |
+
|
| 489 |
+
Args:
|
| 490 |
+
prompt: Text prompt
|
| 491 |
+
reference_source: Reference image source (file, URL, or PIL Image)
|
| 492 |
+
style: Style keywords
|
| 493 |
+
mood: Mood keywords
|
| 494 |
+
strength: Reference strength (only used if reference provided)
|
| 495 |
+
num_images: Number of images to generate
|
| 496 |
+
num_inference_steps: Number of denoising steps
|
| 497 |
+
guidance_scale: Classifier-free guidance scale
|
| 498 |
+
seed: Random seed for reproducibility
|
| 499 |
+
save_results: Whether to save results to disk
|
| 500 |
+
output_dir: Output directory for saved files
|
| 501 |
+
|
| 502 |
+
Returns:
|
| 503 |
+
Dictionary with results and metadata
|
| 504 |
+
"""
|
| 505 |
+
try:
|
| 506 |
+
logger.info(f"Starting batch generation: {num_images} images")
|
| 507 |
+
|
| 508 |
+
reference_image = None
|
| 509 |
+
reference_info = None
|
| 510 |
+
style_suggestions = []
|
| 511 |
+
|
| 512 |
+
# Load and analyze reference image if provided
|
| 513 |
+
if reference_source is not None:
|
| 514 |
+
ref_result = self.load_reference_image(reference_source)
|
| 515 |
+
if ref_result:
|
| 516 |
+
reference_image, reference_info = ref_result
|
| 517 |
+
style_suggestions = reference_info.get('style_suggestions', [])
|
| 518 |
+
logger.info(f"Using reference image with suggestions: {style_suggestions}")
|
| 519 |
+
else:
|
| 520 |
+
logger.warning("Failed to load reference image, falling back to text-only generation")
|
| 521 |
+
|
| 522 |
+
# Generate images
|
| 523 |
+
if reference_image is not None:
|
| 524 |
+
results = self.generate_with_reference(
|
| 525 |
+
prompt=prompt,
|
| 526 |
+
reference_image=reference_image,
|
| 527 |
+
style=style,
|
| 528 |
+
mood=mood,
|
| 529 |
+
strength=strength,
|
| 530 |
+
num_images=num_images,
|
| 531 |
+
num_inference_steps=num_inference_steps,
|
| 532 |
+
guidance_scale=guidance_scale,
|
| 533 |
+
seed=seed,
|
| 534 |
+
style_suggestions=style_suggestions
|
| 535 |
+
)
|
| 536 |
+
else:
|
| 537 |
+
results = self.generate_without_reference(
|
| 538 |
+
prompt=prompt,
|
| 539 |
+
style=style,
|
| 540 |
+
mood=mood,
|
| 541 |
+
num_images=num_images,
|
| 542 |
+
num_inference_steps=num_inference_steps,
|
| 543 |
+
guidance_scale=guidance_scale,
|
| 544 |
+
seed=seed
|
| 545 |
+
)
|
| 546 |
+
|
| 547 |
+
# Save results if requested
|
| 548 |
+
saved_files = []
|
| 549 |
+
if save_results and results:
|
| 550 |
+
saved_files = self.save_results(results, output_dir, reference_info)
|
| 551 |
+
|
| 552 |
+
# Compile final results
|
| 553 |
+
batch_result = {
|
| 554 |
+
'results': results,
|
| 555 |
+
'reference_info': reference_info,
|
| 556 |
+
'saved_files': saved_files,
|
| 557 |
+
'generation_summary': {
|
| 558 |
+
'total_images': len(results),
|
| 559 |
+
'prompt': prompt,
|
| 560 |
+
'style': style,
|
| 561 |
+
'mood': mood,
|
| 562 |
+
'has_reference': reference_image is not None,
|
| 563 |
+
'style_suggestions': style_suggestions,
|
| 564 |
+
'timestamp': datetime.now().isoformat()
|
| 565 |
+
}
|
| 566 |
+
}
|
| 567 |
+
|
| 568 |
+
logger.info(f"Batch generation complete: {len(results)} images generated")
|
| 569 |
+
return batch_result
|
| 570 |
+
|
| 571 |
+
except Exception as e:
|
| 572 |
+
logger.error(f"Error in batch generation: {e}")
|
| 573 |
+
return {
|
| 574 |
+
'results': [],
|
| 575 |
+
'reference_info': None,
|
| 576 |
+
'saved_files': [],
|
| 577 |
+
'error': str(e)
|
| 578 |
+
}
|
src/setup_env.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Environment setup script for CompI project.
|
| 4 |
+
Run this script to check and install dependencies.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import subprocess
|
| 8 |
+
import sys
|
| 9 |
+
import os
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
def run_command(command, description):
|
| 13 |
+
"""Run a shell command and handle errors."""
|
| 14 |
+
print(f"\n🔄 {description}...")
|
| 15 |
+
try:
|
| 16 |
+
result = subprocess.run(command, shell=True, check=True,
|
| 17 |
+
capture_output=True, text=True)
|
| 18 |
+
print(f"✅ {description} completed successfully")
|
| 19 |
+
return True
|
| 20 |
+
except subprocess.CalledProcessError as e:
|
| 21 |
+
print(f"❌ {description} failed:")
|
| 22 |
+
print(f"Error: {e.stderr}")
|
| 23 |
+
return False
|
| 24 |
+
|
| 25 |
+
def check_python_version():
|
| 26 |
+
"""Check if Python version is compatible."""
|
| 27 |
+
version = sys.version_info
|
| 28 |
+
if version.major == 3 and version.minor >= 8:
|
| 29 |
+
print(f"✅ Python {version.major}.{version.minor}.{version.micro} is compatible")
|
| 30 |
+
return True
|
| 31 |
+
else:
|
| 32 |
+
print(f"❌ Python {version.major}.{version.minor}.{version.micro} is not compatible")
|
| 33 |
+
print("Please use Python 3.8 or higher")
|
| 34 |
+
return False
|
| 35 |
+
|
| 36 |
+
def check_gpu():
|
| 37 |
+
"""Check for CUDA availability."""
|
| 38 |
+
try:
|
| 39 |
+
import torch
|
| 40 |
+
if torch.cuda.is_available():
|
| 41 |
+
gpu_count = torch.cuda.device_count()
|
| 42 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 43 |
+
print(f"✅ CUDA available with {gpu_count} GPU(s): {gpu_name}")
|
| 44 |
+
return True
|
| 45 |
+
else:
|
| 46 |
+
print("⚠️ CUDA not available, will use CPU")
|
| 47 |
+
return False
|
| 48 |
+
except ImportError:
|
| 49 |
+
print("⚠️ PyTorch not installed yet, GPU check will be done after installation")
|
| 50 |
+
return False
|
| 51 |
+
|
| 52 |
+
def install_requirements():
|
| 53 |
+
"""Install requirements from requirements.txt."""
|
| 54 |
+
if not Path("requirements.txt").exists():
|
| 55 |
+
print("❌ requirements.txt not found")
|
| 56 |
+
return False
|
| 57 |
+
|
| 58 |
+
return run_command(
|
| 59 |
+
f"{sys.executable} -m pip install -r requirements.txt",
|
| 60 |
+
"Installing requirements"
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
def download_nltk_data():
|
| 64 |
+
"""Download required NLTK data."""
|
| 65 |
+
try:
|
| 66 |
+
import nltk
|
| 67 |
+
print("\n🔄 Downloading NLTK data...")
|
| 68 |
+
nltk.download('punkt', quiet=True)
|
| 69 |
+
nltk.download('vader_lexicon', quiet=True)
|
| 70 |
+
nltk.download('stopwords', quiet=True)
|
| 71 |
+
print("✅ NLTK data downloaded")
|
| 72 |
+
return True
|
| 73 |
+
except ImportError:
|
| 74 |
+
print("⚠️ NLTK not installed, skipping data download")
|
| 75 |
+
return False
|
| 76 |
+
|
| 77 |
+
def setup_textblob():
|
| 78 |
+
"""Setup TextBlob corpora."""
|
| 79 |
+
try:
|
| 80 |
+
import textblob
|
| 81 |
+
print("\n🔄 Setting up TextBlob...")
|
| 82 |
+
run_command(f"{sys.executable} -m textblob.download_corpora",
|
| 83 |
+
"Downloading TextBlob corpora")
|
| 84 |
+
return True
|
| 85 |
+
except ImportError:
|
| 86 |
+
print("⚠️ TextBlob not installed, skipping setup")
|
| 87 |
+
return False
|
| 88 |
+
|
| 89 |
+
def main():
|
| 90 |
+
"""Main setup function."""
|
| 91 |
+
print("🚀 Setting up CompI Development Environment")
|
| 92 |
+
print("=" * 50)
|
| 93 |
+
|
| 94 |
+
# Check Python version
|
| 95 |
+
if not check_python_version():
|
| 96 |
+
sys.exit(1)
|
| 97 |
+
|
| 98 |
+
# Install requirements
|
| 99 |
+
if not install_requirements():
|
| 100 |
+
print("❌ Failed to install requirements")
|
| 101 |
+
sys.exit(1)
|
| 102 |
+
|
| 103 |
+
# Check GPU after PyTorch installation
|
| 104 |
+
check_gpu()
|
| 105 |
+
|
| 106 |
+
# Setup additional components
|
| 107 |
+
download_nltk_data()
|
| 108 |
+
setup_textblob()
|
| 109 |
+
|
| 110 |
+
print("\n" + "=" * 50)
|
| 111 |
+
print("🎉 Environment setup completed!")
|
| 112 |
+
print("\nNext steps:")
|
| 113 |
+
print("1. Run: python src/test_setup.py")
|
| 114 |
+
print("2. Start experimenting with notebooks/")
|
| 115 |
+
print("3. Check out the README.md for usage examples")
|
| 116 |
+
|
| 117 |
+
if __name__ == "__main__":
|
| 118 |
+
main()
|
src/ui/__init__.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI User Interface Components
|
| 3 |
+
Interactive web interfaces for the CompI platform.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
__all__ = [
|
| 7 |
+
"compi_phase1c_streamlit_ui",
|
| 8 |
+
"compi_phase1c_gradio_ui",
|
| 9 |
+
"compi_phase2a_streamlit_ui",
|
| 10 |
+
"compi_phase2b_streamlit_ui",
|
| 11 |
+
"compi_phase2c_streamlit_ui",
|
| 12 |
+
"compi_phase2d_streamlit_ui",
|
| 13 |
+
"compi_phase2e_streamlit_ui"
|
| 14 |
+
]
|
src/ui/compi_phase3_final_dashboard.py
ADDED
|
@@ -0,0 +1,1709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CompI Phase 3 Final Dashboard - Complete Integration (3.A → 3.E)
|
| 4 |
+
|
| 5 |
+
This is the ultimate CompI interface that integrates ALL Phase 3 components:
|
| 6 |
+
- Phase 3.A/3.B: True multimodal fusion with real processing
|
| 7 |
+
- Phase 3.C: Advanced references with role assignment and live ControlNet previews
|
| 8 |
+
- Phase 3.D: Professional workflow management (gallery, presets, export)
|
| 9 |
+
- Phase 3.E: Performance management and model switching
|
| 10 |
+
|
| 11 |
+
Features:
|
| 12 |
+
- All multimodal inputs (Text, Audio, Data, Emotion, Real-time, Multi-Reference)
|
| 13 |
+
- Advanced References: multi-image upload/URLs, style vs structure roles, ControlNet with live previews
|
| 14 |
+
- Model & Performance: SD 1.5/SDXL switching, LoRA integration, VRAM monitoring, OOM auto-retry
|
| 15 |
+
- Workflow & Export: gallery, filters, rating/tags/notes, presets save/load, portable export ZIP
|
| 16 |
+
- True fusion engine: real processing for all inputs, intelligent generation mode selection
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import os
|
| 20 |
+
import io
|
| 21 |
+
import csv
|
| 22 |
+
import json
|
| 23 |
+
import zipfile
|
| 24 |
+
import shutil
|
| 25 |
+
import platform
|
| 26 |
+
import requests
|
| 27 |
+
from datetime import datetime
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
from typing import Optional, Dict, List
|
| 30 |
+
|
| 31 |
+
import numpy as np
|
| 32 |
+
import pandas as pd
|
| 33 |
+
import streamlit as st
|
| 34 |
+
from PIL import Image
|
| 35 |
+
import torch
|
| 36 |
+
|
| 37 |
+
# --- Diffusers base (txt2img, img2img) ---
|
| 38 |
+
from diffusers import (
|
| 39 |
+
StableDiffusionPipeline,
|
| 40 |
+
StableDiffusionImg2ImgPipeline,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# --- ControlNet (optional, with graceful fallback) ---
|
| 44 |
+
HAS_CONTROLNET = True
|
| 45 |
+
CN_IMG2IMG_AVAILABLE = True
|
| 46 |
+
try:
|
| 47 |
+
from diffusers import (
|
| 48 |
+
StableDiffusionControlNetPipeline,
|
| 49 |
+
StableDiffusionControlNetImg2ImgPipeline,
|
| 50 |
+
ControlNetModel,
|
| 51 |
+
)
|
| 52 |
+
except Exception:
|
| 53 |
+
HAS_CONTROLNET = False
|
| 54 |
+
CN_IMG2IMG_AVAILABLE = False
|
| 55 |
+
|
| 56 |
+
# --- SDXL & Upscaler (optional) ---
|
| 57 |
+
HAS_SDXL = True
|
| 58 |
+
HAS_UPSCALER = True
|
| 59 |
+
try:
|
| 60 |
+
from diffusers import StableDiffusionXLPipeline
|
| 61 |
+
except Exception:
|
| 62 |
+
HAS_SDXL = False
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
from diffusers import StableDiffusionLatentUpscalePipeline
|
| 66 |
+
except Exception:
|
| 67 |
+
HAS_UPSCALER = False
|
| 68 |
+
|
| 69 |
+
# --- Audio, Emotion, Real-time, Plots, Previews ---
|
| 70 |
+
def _lazy_install(pkgs: str):
|
| 71 |
+
"""Install packages on demand"""
|
| 72 |
+
os.system(f"pip install -q {pkgs}")
|
| 73 |
+
|
| 74 |
+
try:
|
| 75 |
+
import librosa
|
| 76 |
+
import soundfile as sf
|
| 77 |
+
except Exception:
|
| 78 |
+
_lazy_install("librosa soundfile")
|
| 79 |
+
import librosa
|
| 80 |
+
import soundfile as sf
|
| 81 |
+
|
| 82 |
+
try:
|
| 83 |
+
import whisper
|
| 84 |
+
except Exception:
|
| 85 |
+
_lazy_install("git+https://github.com/openai/whisper.git")
|
| 86 |
+
import whisper
|
| 87 |
+
|
| 88 |
+
try:
|
| 89 |
+
from textblob import TextBlob
|
| 90 |
+
except Exception:
|
| 91 |
+
_lazy_install("textblob")
|
| 92 |
+
from textblob import TextBlob
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
import feedparser
|
| 96 |
+
except Exception:
|
| 97 |
+
_lazy_install("feedparser")
|
| 98 |
+
import feedparser
|
| 99 |
+
|
| 100 |
+
try:
|
| 101 |
+
import matplotlib.pyplot as plt
|
| 102 |
+
except Exception:
|
| 103 |
+
_lazy_install("matplotlib")
|
| 104 |
+
import matplotlib.pyplot as plt
|
| 105 |
+
|
| 106 |
+
try:
|
| 107 |
+
import cv2
|
| 108 |
+
except Exception:
|
| 109 |
+
_lazy_install("opencv-python-headless")
|
| 110 |
+
import cv2
|
| 111 |
+
|
| 112 |
+
# ==================== CONSTANTS & PATHS ====================
|
| 113 |
+
|
| 114 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 115 |
+
|
| 116 |
+
# Directory structure
|
| 117 |
+
OUTPUT_DIR = Path("outputs")
|
| 118 |
+
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 119 |
+
|
| 120 |
+
EXPORTS_DIR = Path("exports")
|
| 121 |
+
EXPORTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 122 |
+
|
| 123 |
+
PRESETS_DIR = Path("presets")
|
| 124 |
+
PRESETS_DIR.mkdir(parents=True, exist_ok=True)
|
| 125 |
+
|
| 126 |
+
# Log files for different phases
|
| 127 |
+
RUNLOG = OUTPUT_DIR / "phase3_run_log.csv" # fusion logs (3.B)
|
| 128 |
+
RUNLOG_3C = OUTPUT_DIR / "phase3c_runs.csv" # advanced ref logs (3.C)
|
| 129 |
+
RUNLOG_3E = OUTPUT_DIR / "phase3e_runlog.csv" # perf/model logs (3.E)
|
| 130 |
+
ANNOT_CSV = OUTPUT_DIR / "phase3d_annotations.csv" # annotations (3.D)
|
| 131 |
+
|
| 132 |
+
# ==================== UTILITY FUNCTIONS ====================
|
| 133 |
+
|
| 134 |
+
def slugify(s: str, n=30):
|
| 135 |
+
"""Create safe filename from string"""
|
| 136 |
+
if not s:
|
| 137 |
+
return "none"
|
| 138 |
+
return "_".join(s.lower().split())[:n]
|
| 139 |
+
|
| 140 |
+
def save_image(img: Image.Image, name: str) -> str:
|
| 141 |
+
"""Save image to outputs directory"""
|
| 142 |
+
p = OUTPUT_DIR / name
|
| 143 |
+
img.save(p)
|
| 144 |
+
return str(p)
|
| 145 |
+
|
| 146 |
+
def vram_gb() -> Optional[float]:
|
| 147 |
+
"""Get total VRAM in GB"""
|
| 148 |
+
if DEVICE == "cuda":
|
| 149 |
+
try:
|
| 150 |
+
return torch.cuda.get_device_properties(0).total_memory / (1024**3)
|
| 151 |
+
except Exception:
|
| 152 |
+
return None
|
| 153 |
+
return None
|
| 154 |
+
|
| 155 |
+
def vram_used_gb() -> Optional[float]:
|
| 156 |
+
"""Get used VRAM in GB"""
|
| 157 |
+
if DEVICE == "cuda":
|
| 158 |
+
try:
|
| 159 |
+
torch.cuda.synchronize()
|
| 160 |
+
return torch.cuda.memory_allocated() / (1024**3)
|
| 161 |
+
except Exception:
|
| 162 |
+
return None
|
| 163 |
+
return None
|
| 164 |
+
|
| 165 |
+
def attempt_enable_xformers(pipe):
|
| 166 |
+
"""Try to enable xFormers memory efficient attention"""
|
| 167 |
+
try:
|
| 168 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 169 |
+
return True
|
| 170 |
+
except Exception:
|
| 171 |
+
return False
|
| 172 |
+
|
| 173 |
+
def apply_perf(pipe, attn_slice=True, vae_slice=True, vae_tile=False):
|
| 174 |
+
"""Apply performance optimizations to pipeline"""
|
| 175 |
+
if attn_slice:
|
| 176 |
+
pipe.enable_attention_slicing()
|
| 177 |
+
if vae_slice:
|
| 178 |
+
try:
|
| 179 |
+
pipe.enable_vae_slicing()
|
| 180 |
+
except Exception:
|
| 181 |
+
pass
|
| 182 |
+
if vae_tile:
|
| 183 |
+
try:
|
| 184 |
+
pipe.enable_vae_tiling()
|
| 185 |
+
except Exception:
|
| 186 |
+
pass
|
| 187 |
+
|
| 188 |
+
def safe_retry_sizes(h, w, steps):
|
| 189 |
+
"""Generate progressive fallback sizes for OOM recovery"""
|
| 190 |
+
sizes = [
|
| 191 |
+
(h, w, steps),
|
| 192 |
+
(max(384, h//2), max(384, w//2), max(steps-8, 12)),
|
| 193 |
+
(384, 384, max(steps-12, 12)),
|
| 194 |
+
(256, 256, max(steps-16, 10)),
|
| 195 |
+
]
|
| 196 |
+
seen = set()
|
| 197 |
+
for it in sizes:
|
| 198 |
+
if it not in seen:
|
| 199 |
+
seen.add(it)
|
| 200 |
+
yield it
|
| 201 |
+
|
| 202 |
+
def canny_map(img: Image.Image) -> Image.Image:
|
| 203 |
+
"""Create Canny edge map from image"""
|
| 204 |
+
arr = np.array(img.convert("RGB"))
|
| 205 |
+
edges = cv2.Canny(arr, 100, 200)
|
| 206 |
+
edges_rgb = cv2.cvtColor(edges, cv2.COLOR_GRAY2RGB)
|
| 207 |
+
return Image.fromarray(edges_rgb)
|
| 208 |
+
|
| 209 |
+
def depth_proxy(img: Image.Image) -> Image.Image:
|
| 210 |
+
"""Create depth-like proxy using grayscale"""
|
| 211 |
+
gray = img.convert("L")
|
| 212 |
+
return Image.merge("RGB", (gray, gray, gray))
|
| 213 |
+
|
| 214 |
+
def save_plot(fig) -> Image.Image:
|
| 215 |
+
"""Save matplotlib figure as PIL Image"""
|
| 216 |
+
buf = io.BytesIO()
|
| 217 |
+
fig.savefig(buf, format="png", bbox_inches="tight")
|
| 218 |
+
plt.close(fig)
|
| 219 |
+
buf.seek(0)
|
| 220 |
+
return Image.open(buf).convert("RGB")
|
| 221 |
+
|
| 222 |
+
def env_snapshot() -> Dict:
|
| 223 |
+
"""Create environment snapshot for reproducibility"""
|
| 224 |
+
import sys
|
| 225 |
+
try:
|
| 226 |
+
import importlib.metadata as im
|
| 227 |
+
except Exception:
|
| 228 |
+
import importlib_metadata as im
|
| 229 |
+
|
| 230 |
+
pkgs = {}
|
| 231 |
+
for pkg in ["torch", "diffusers", "transformers", "accelerate", "opencv-python-headless",
|
| 232 |
+
"librosa", "whisper", "textblob", "pandas", "numpy", "matplotlib",
|
| 233 |
+
"feedparser", "streamlit", "Pillow"]:
|
| 234 |
+
try:
|
| 235 |
+
pkgs[pkg] = im.version(pkg)
|
| 236 |
+
except Exception:
|
| 237 |
+
pass
|
| 238 |
+
|
| 239 |
+
return {
|
| 240 |
+
"timestamp": datetime.now().isoformat(),
|
| 241 |
+
"python_version": sys.version,
|
| 242 |
+
"platform": platform.platform(),
|
| 243 |
+
"packages": pkgs
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
def mk_readme(bundle_meta: Dict, df_meta: pd.DataFrame) -> str:
|
| 247 |
+
"""Generate README for export bundle"""
|
| 248 |
+
L = []
|
| 249 |
+
L.append(f"# CompI Export — {bundle_meta['bundle_name']}\n")
|
| 250 |
+
L.append(f"_Created: {bundle_meta['created_at']}_\n")
|
| 251 |
+
L += [
|
| 252 |
+
"## What's inside",
|
| 253 |
+
"- Selected images",
|
| 254 |
+
"- `manifest.json` (environment + settings)",
|
| 255 |
+
"- `metadata.csv` (merged logs)",
|
| 256 |
+
"- `annotations.csv` (ratings/tags/notes)",
|
| 257 |
+
]
|
| 258 |
+
if bundle_meta.get("preset"):
|
| 259 |
+
L.append("- `preset.json` (saved generation settings)")
|
| 260 |
+
|
| 261 |
+
L.append("\n## Summary of selected runs")
|
| 262 |
+
if not df_meta.empty and "mode" in df_meta.columns:
|
| 263 |
+
counts = df_meta["mode"].value_counts().to_dict()
|
| 264 |
+
L.append("Modes:")
|
| 265 |
+
for k, v in counts.items():
|
| 266 |
+
L.append(f"- {k}: {v}")
|
| 267 |
+
|
| 268 |
+
L.append("\n## Reproducing")
|
| 269 |
+
L.append("1. Install versions in `manifest.json`.")
|
| 270 |
+
L.append("2. Use `preset.json` or copy prompt/params from `metadata.csv`.")
|
| 271 |
+
L.append("3. Run the dashboard with these settings.")
|
| 272 |
+
|
| 273 |
+
return "\n".join(L)
|
| 274 |
+
|
| 275 |
+
# ==================== CACHED MODEL LOADERS ====================
|
| 276 |
+
|
| 277 |
+
@st.cache_resource(show_spinner=True)
|
| 278 |
+
def load_sd15(txt2img=True):
|
| 279 |
+
"""Load Stable Diffusion 1.5 pipeline"""
|
| 280 |
+
if txt2img:
|
| 281 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
| 282 |
+
"runwayml/stable-diffusion-v1-5",
|
| 283 |
+
safety_checker=None,
|
| 284 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 285 |
+
)
|
| 286 |
+
else:
|
| 287 |
+
pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
|
| 288 |
+
"runwayml/stable-diffusion-v1-5",
|
| 289 |
+
safety_checker=None,
|
| 290 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 291 |
+
)
|
| 292 |
+
return pipe.to(DEVICE)
|
| 293 |
+
|
| 294 |
+
@st.cache_resource(show_spinner=True)
|
| 295 |
+
def load_sdxl():
|
| 296 |
+
"""Load SDXL pipeline"""
|
| 297 |
+
if not HAS_SDXL:
|
| 298 |
+
return None
|
| 299 |
+
pipe = StableDiffusionXLPipeline.from_pretrained(
|
| 300 |
+
"stabilityai/stable-diffusion-xl-base-1.0",
|
| 301 |
+
safety_checker=None,
|
| 302 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 303 |
+
)
|
| 304 |
+
return pipe.to(DEVICE)
|
| 305 |
+
|
| 306 |
+
@st.cache_resource(show_spinner=True)
|
| 307 |
+
def load_upscaler():
|
| 308 |
+
"""Load latent upscaler pipeline"""
|
| 309 |
+
if not HAS_UPSCALER:
|
| 310 |
+
return None
|
| 311 |
+
up = StableDiffusionLatentUpscalePipeline.from_pretrained(
|
| 312 |
+
"stabilityai/sd-x2-latent-upscaler",
|
| 313 |
+
safety_checker=None,
|
| 314 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 315 |
+
)
|
| 316 |
+
return up.to(DEVICE)
|
| 317 |
+
|
| 318 |
+
@st.cache_resource(show_spinner=True)
|
| 319 |
+
def load_controlnet(cn_type: str):
|
| 320 |
+
"""Load ControlNet pipeline"""
|
| 321 |
+
if not HAS_CONTROLNET:
|
| 322 |
+
return None
|
| 323 |
+
cn_id = "lllyasviel/sd-controlnet-canny" if cn_type == "Canny" else "lllyasviel/sd-controlnet-depth"
|
| 324 |
+
controlnet = ControlNetModel.from_pretrained(
|
| 325 |
+
cn_id, torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
| 326 |
+
)
|
| 327 |
+
pipe = StableDiffusionControlNetPipeline.from_pretrained(
|
| 328 |
+
"runwayml/stable-diffusion-v1-5",
|
| 329 |
+
controlnet=controlnet,
|
| 330 |
+
safety_checker=None,
|
| 331 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 332 |
+
).to(DEVICE)
|
| 333 |
+
try:
|
| 334 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 335 |
+
except Exception:
|
| 336 |
+
pass
|
| 337 |
+
pipe.enable_attention_slicing()
|
| 338 |
+
return pipe
|
| 339 |
+
|
| 340 |
+
@st.cache_resource(show_spinner=True)
|
| 341 |
+
def load_controlnet_img2img(cn_type: str):
|
| 342 |
+
"""Load ControlNet + Img2Img hybrid pipeline"""
|
| 343 |
+
global CN_IMG2IMG_AVAILABLE
|
| 344 |
+
if not HAS_CONTROLNET:
|
| 345 |
+
return None
|
| 346 |
+
try:
|
| 347 |
+
cn_id = "lllyasviel/sd-controlnet-canny" if cn_type == "Canny" else "lllyasviel/sd-controlnet-depth"
|
| 348 |
+
controlnet = ControlNetModel.from_pretrained(
|
| 349 |
+
cn_id, torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32
|
| 350 |
+
)
|
| 351 |
+
pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
|
| 352 |
+
"runwayml/stable-diffusion-v1-5",
|
| 353 |
+
controlnet=controlnet,
|
| 354 |
+
safety_checker=None,
|
| 355 |
+
torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
|
| 356 |
+
).to(DEVICE)
|
| 357 |
+
try:
|
| 358 |
+
pipe.enable_xformers_memory_efficient_attention()
|
| 359 |
+
except Exception:
|
| 360 |
+
pass
|
| 361 |
+
pipe.enable_attention_slicing()
|
| 362 |
+
return pipe
|
| 363 |
+
except Exception:
|
| 364 |
+
CN_IMG2IMG_AVAILABLE = False
|
| 365 |
+
return None
|
| 366 |
+
|
| 367 |
+
# ==================== STREAMLIT LAYOUT ====================
|
| 368 |
+
|
| 369 |
+
st.set_page_config(page_title="CompI — Phase 3 Final Dashboard", layout="wide")
|
| 370 |
+
st.title("🧪 CompI — Final Integrated Dashboard (3.A → 3.E)")
|
| 371 |
+
|
| 372 |
+
# ---- Minimal, clean UI styling ----
|
| 373 |
+
|
| 374 |
+
def inject_minimal_css():
|
| 375 |
+
st.markdown(
|
| 376 |
+
"""
|
| 377 |
+
<style>
|
| 378 |
+
.block-container {padding-top: 1.2rem; padding-bottom: 2rem; max-width: 1200px;}
|
| 379 |
+
.stTabs [role="tablist"] {gap: 6px;}
|
| 380 |
+
.stTabs [role="tab"] {padding: 6px 10px; border-radius: 8px; background: rgba(255,255,255,0.02); border: 1px solid rgba(255,255,255,0.08);}
|
| 381 |
+
.stTabs [aria-selected="true"] {background: rgba(255,255,255,0.04); border-color: rgba(255,255,255,0.16);}
|
| 382 |
+
h1, h2, h3 {margin-bottom: .3rem;}
|
| 383 |
+
.section {padding: 14px 16px; border: 1px solid rgba(255,255,255,0.08); border-radius: 12px; background: rgba(255,255,255,0.02); margin-bottom: 14px;}
|
| 384 |
+
.muted {color: rgba(255,255,255,0.6); text-transform: uppercase; letter-spacing: .08em; font-size: .75rem; margin-bottom: .25rem;}
|
| 385 |
+
.stButton>button {border-radius: 10px; height: 44px;}
|
| 386 |
+
.stButton>button[kind="primary"] {background: #2563eb; border-color: #2563eb;}
|
| 387 |
+
.stTextInput input, .stTextArea textarea {border-radius: 10px;}
|
| 388 |
+
.stMultiSelect [data-baseweb="tag"] {border-radius: 8px;}
|
| 389 |
+
pre, code {border-radius: 10px;}
|
| 390 |
+
#MainMenu, footer {visibility: hidden;}
|
| 391 |
+
</style>
|
| 392 |
+
""",
|
| 393 |
+
unsafe_allow_html=True,
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
# Apply minimal styling early
|
| 397 |
+
inject_minimal_css()
|
| 398 |
+
|
| 399 |
+
# Top metrics (Phase 3.E VRAM monitoring)
|
| 400 |
+
colA, colB, colC, colD = st.columns(4)
|
| 401 |
+
with colA:
|
| 402 |
+
st.metric("Device", DEVICE)
|
| 403 |
+
with colB:
|
| 404 |
+
st.metric("VRAM (GB)", f"{vram_gb():.2f}" if vram_gb() else "N/A")
|
| 405 |
+
with colC:
|
| 406 |
+
st.metric("Used VRAM (GB)", f"{vram_used_gb():.2f}" if vram_used_gb() else "N/A")
|
| 407 |
+
with colD:
|
| 408 |
+
st.caption(f"PyTorch {torch.__version__} • diffusers ready")
|
| 409 |
+
|
| 410 |
+
# Handle deferred clear request BEFORE creating any widgets
|
| 411 |
+
if st.session_state.get("clear_inputs", False):
|
| 412 |
+
# Pop ALL relevant input/widget keys so widgets re-initialize to defaults
|
| 413 |
+
keys_to_clear = [
|
| 414 |
+
# Text inputs
|
| 415 |
+
"main_prompt_input", "style_input", "mood_input", "neg_prompt_input", "style_ms", "mood_ms",
|
| 416 |
+
# Optional text areas
|
| 417 |
+
"emo_free_textarea", "ref_urls_textarea",
|
| 418 |
+
# Uploaders & inputs
|
| 419 |
+
"audio_file_uploader", "data_file_uploader", "formula_input", "ref_images_uploader",
|
| 420 |
+
# Toggles / checkboxes / selects / sliders (with explicit keys)
|
| 421 |
+
"enable_emo_checkbox", "enable_rt_checkbox", "enable_ref_checkbox",
|
| 422 |
+
"model_choice_selectbox", "gen_mode_selectbox",
|
| 423 |
+
"use_lora_checkbox", "lora_path_input", "lora_scale_slider",
|
| 424 |
+
"width_input", "height_input", "steps_input", "guidance_input",
|
| 425 |
+
"batch_input", "seed_input", "upsample_checkbox",
|
| 426 |
+
"use_xformers_checkbox", "attn_slice_checkbox", "vae_slice_checkbox", "vae_tile_checkbox",
|
| 427 |
+
"oom_retry_checkbox",
|
| 428 |
+
# Real-time extras
|
| 429 |
+
"city_input", "headlines_slider",
|
| 430 |
+
]
|
| 431 |
+
for k in keys_to_clear:
|
| 432 |
+
st.session_state.pop(k, None)
|
| 433 |
+
|
| 434 |
+
# Clear outputs/state
|
| 435 |
+
st.session_state["generated_images"] = []
|
| 436 |
+
st.session_state["generation_results"] = []
|
| 437 |
+
|
| 438 |
+
# Unset the flag and rerun
|
| 439 |
+
st.session_state["clear_inputs"] = False
|
| 440 |
+
|
| 441 |
+
|
| 442 |
+
# Main tabs - Complete Phase 3 integration
|
| 443 |
+
# Moved generation below Inputs per UX request; removed separate Generate tab
|
| 444 |
+
tab_inputs, tab_refs, tab_model, tab_gallery, tab_presets, tab_export = st.tabs([
|
| 445 |
+
"🧩 Inputs (Text/Audio/Data/Emotion/Real‑time)",
|
| 446 |
+
"🖼️ Advanced References",
|
| 447 |
+
"⚙️ Model & Performance",
|
| 448 |
+
"🖼️ Gallery & Annotate",
|
| 449 |
+
"💾 Presets",
|
| 450 |
+
"📦 Export"
|
| 451 |
+
])
|
| 452 |
+
|
| 453 |
+
# ==================== INPUTS TAB (Phase 3.A/3.B) ====================
|
| 454 |
+
|
| 455 |
+
with tab_inputs:
|
| 456 |
+
st.markdown("<div class='section'>", unsafe_allow_html=True)
|
| 457 |
+
st.subheader("🧩 Multimodal Inputs")
|
| 458 |
+
|
| 459 |
+
# Text & Style (always enabled)
|
| 460 |
+
st.markdown("<div class='muted'>Text & Style</div>", unsafe_allow_html=True)
|
| 461 |
+
main_prompt = st.text_input(
|
| 462 |
+
"Main prompt",
|
| 463 |
+
value=st.session_state.get("main_prompt_input", ""),
|
| 464 |
+
placeholder="A serene cyberpunk alley at dawn",
|
| 465 |
+
key="main_prompt_input",
|
| 466 |
+
)
|
| 467 |
+
|
| 468 |
+
# Style and Mood as multi-select dropdowns
|
| 469 |
+
STYLE_OPTIONS = [
|
| 470 |
+
"digital painting", "watercolor", "oil painting", "pixel art", "anime",
|
| 471 |
+
"3D render", "photorealistic", "line art", "low poly", "cyberpunk",
|
| 472 |
+
"isometric", "concept art", "cel shading", "comic book", "impressionist"
|
| 473 |
+
]
|
| 474 |
+
MOOD_OPTIONS = [
|
| 475 |
+
"dreamy", "luminous", "dark and moody", "whimsical", "serene",
|
| 476 |
+
"epic", "melancholic", "vibrant", "mysterious", "dystopian",
|
| 477 |
+
"hopeful", "playful", "contemplative", "energetic", "ethereal"
|
| 478 |
+
]
|
| 479 |
+
|
| 480 |
+
style_selected = st.multiselect(
|
| 481 |
+
"Style (choose one or more)",
|
| 482 |
+
options=STYLE_OPTIONS,
|
| 483 |
+
default=st.session_state.get("style_ms", []),
|
| 484 |
+
key="style_ms",
|
| 485 |
+
help="Pick one or more styles to condition the artwork"
|
| 486 |
+
)
|
| 487 |
+
mood_selected = st.multiselect(
|
| 488 |
+
"Mood (choose one or more)",
|
| 489 |
+
options=MOOD_OPTIONS,
|
| 490 |
+
default=st.session_state.get("mood_ms", []),
|
| 491 |
+
key="mood_ms",
|
| 492 |
+
help="Pick one or more moods to influence the atmosphere"
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
# Join lists into strings for downstream prompt fusion
|
| 496 |
+
style = ", ".join(style_selected)
|
| 497 |
+
mood = ", ".join(mood_selected)
|
| 498 |
+
|
| 499 |
+
neg_prompt = st.text_input(
|
| 500 |
+
"Negative prompt (optional)",
|
| 501 |
+
value=st.session_state.get("neg_prompt_input", ""),
|
| 502 |
+
placeholder="e.g., low quality, bad anatomy",
|
| 503 |
+
key="neg_prompt_input",
|
| 504 |
+
)
|
| 505 |
+
|
| 506 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 507 |
+
|
| 508 |
+
# Four columns for aligned sections
|
| 509 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 510 |
+
|
| 511 |
+
# AUDIO PROCESSING (Phase 2.A)
|
| 512 |
+
with col1:
|
| 513 |
+
st.markdown("### 🎵 Audio Analysis")
|
| 514 |
+
enable_audio = st.checkbox("Enable Audio Processing", value=False)
|
| 515 |
+
audio_caption = ""
|
| 516 |
+
audio_tags = []
|
| 517 |
+
tempo = None
|
| 518 |
+
|
| 519 |
+
if enable_audio:
|
| 520 |
+
audio_file = st.file_uploader("Upload audio (.wav/.mp3)", type=["wav", "mp3"], key="audio_file_uploader")
|
| 521 |
+
if audio_file:
|
| 522 |
+
# Save temporary audio file
|
| 523 |
+
audio_path = OUTPUT_DIR / "tmp_audio.wav"
|
| 524 |
+
with open(audio_path, "wb") as f:
|
| 525 |
+
f.write(audio_file.read())
|
| 526 |
+
|
| 527 |
+
# Load and analyze audio
|
| 528 |
+
y, sr = librosa.load(audio_path.as_posix(), sr=16000)
|
| 529 |
+
dur = librosa.get_duration(y=y, sr=sr)
|
| 530 |
+
st.caption(f"Duration: {dur:.1f}s")
|
| 531 |
+
|
| 532 |
+
# Extract tempo
|
| 533 |
+
try:
|
| 534 |
+
tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
|
| 535 |
+
except Exception:
|
| 536 |
+
tempo = None
|
| 537 |
+
|
| 538 |
+
# Extract audio features
|
| 539 |
+
rms = float(np.sqrt(np.mean(y**2)))
|
| 540 |
+
zcr = float(np.mean(librosa.feature.zero_crossing_rate(y)))
|
| 541 |
+
|
| 542 |
+
# Generate audio tags based on features
|
| 543 |
+
if tempo:
|
| 544 |
+
if tempo < 90:
|
| 545 |
+
audio_tags.append("slow tempo")
|
| 546 |
+
elif tempo > 140:
|
| 547 |
+
audio_tags.append("fast tempo")
|
| 548 |
+
|
| 549 |
+
if rms > 0.04:
|
| 550 |
+
audio_tags.append("energetic")
|
| 551 |
+
if zcr > 0.12:
|
| 552 |
+
audio_tags.append("percussive")
|
| 553 |
+
|
| 554 |
+
# Whisper transcription
|
| 555 |
+
st.info("Transcribing audio (Whisper base)…")
|
| 556 |
+
w = whisper.load_model("base", device=DEVICE)
|
| 557 |
+
wav = whisper.load_audio(audio_path.as_posix())
|
| 558 |
+
wav = whisper.pad_or_trim(wav)
|
| 559 |
+
mel = whisper.log_mel_spectrogram(wav).to(DEVICE)
|
| 560 |
+
dec = whisper.DecodingOptions(language="en", fp16=(DEVICE=="cuda"))
|
| 561 |
+
res = whisper.decode(w, mel, dec)
|
| 562 |
+
audio_caption = res.text.strip()
|
| 563 |
+
|
| 564 |
+
st.success(f"Caption: '{audio_caption}'")
|
| 565 |
+
if audio_tags:
|
| 566 |
+
st.write("Audio tags:", ", ".join(audio_tags))
|
| 567 |
+
|
| 568 |
+
# DATA PROCESSING (Phase 2.B)
|
| 569 |
+
with col2:
|
| 570 |
+
st.markdown("### 📊 Data Analysis")
|
| 571 |
+
enable_data = st.checkbox("Enable Data Processing", value=False)
|
| 572 |
+
data_summary = ""
|
| 573 |
+
data_plot = None
|
| 574 |
+
|
| 575 |
+
if enable_data:
|
| 576 |
+
data_file = st.file_uploader("Upload CSV", type=["csv"], key="data_file_uploader")
|
| 577 |
+
formula = st.text_input("Or numpy formula", placeholder="np.sin(np.linspace(0, 20, 200))", key="formula_input")
|
| 578 |
+
|
| 579 |
+
if data_file is not None:
|
| 580 |
+
df = pd.read_csv(data_file)
|
| 581 |
+
st.dataframe(df.head(), use_container_width=True)
|
| 582 |
+
|
| 583 |
+
# Analyze numeric columns
|
| 584 |
+
num = df.select_dtypes(include=np.number)
|
| 585 |
+
if not num.empty:
|
| 586 |
+
means, mins, maxs, stds = num.mean(), num.min(), num.max(), num.std()
|
| 587 |
+
data_summary = f"{len(num)} rows x {num.shape[1]} cols; " + " ".join([
|
| 588 |
+
f"{c}: avg {means[c]:.2f}, min {mins[c]:.2f}, max {maxs[c]:.2f}."
|
| 589 |
+
for c in num.columns[:3]
|
| 590 |
+
])
|
| 591 |
+
data_summary += " Variability " + ("high." if stds.mean() > 1 else "gentle.")
|
| 592 |
+
|
| 593 |
+
# Create visualization
|
| 594 |
+
fig = plt.figure(figsize=(6, 3))
|
| 595 |
+
if num.shape[1] == 1:
|
| 596 |
+
plt.plot(num.iloc[:, 0])
|
| 597 |
+
plt.title(f"Pattern: {num.columns[0]}")
|
| 598 |
+
else:
|
| 599 |
+
plt.plot(num.iloc[:, 0], label=num.columns[0])
|
| 600 |
+
plt.plot(num.iloc[:, 1], label=num.columns[1])
|
| 601 |
+
plt.legend()
|
| 602 |
+
plt.title("Data Patterns")
|
| 603 |
+
plt.tight_layout()
|
| 604 |
+
data_plot = save_plot(fig)
|
| 605 |
+
st.image(data_plot, caption="Data pattern")
|
| 606 |
+
|
| 607 |
+
elif formula.strip():
|
| 608 |
+
try:
|
| 609 |
+
arr = eval(formula, {"np": np, "__builtins__": {}})
|
| 610 |
+
arr = np.array(arr)
|
| 611 |
+
data_summary = f"Mathematical pattern with {arr.size} points."
|
| 612 |
+
|
| 613 |
+
fig = plt.figure(figsize=(6, 3))
|
| 614 |
+
plt.plot(arr)
|
| 615 |
+
plt.title("Formula Pattern")
|
| 616 |
+
plt.tight_layout()
|
| 617 |
+
data_plot = save_plot(fig)
|
| 618 |
+
st.image(data_plot, caption="Formula pattern")
|
| 619 |
+
except Exception as e:
|
| 620 |
+
st.error(f"Formula error: {e}")
|
| 621 |
+
|
| 622 |
+
# EMOTION (Phase 2.C)
|
| 623 |
+
with col3:
|
| 624 |
+
st.markdown("### 💭 Emotion Analysis")
|
| 625 |
+
enable_emo = st.checkbox("Enable Emotion Processing", value=False, key="enable_emo_checkbox")
|
| 626 |
+
emo_free = st.text_area(
|
| 627 |
+
"Describe a feeling/context",
|
| 628 |
+
value=st.session_state.get("emo_free_textarea", ""),
|
| 629 |
+
key="emo_free_textarea",
|
| 630 |
+
) if enable_emo else ""
|
| 631 |
+
emo_label = ""
|
| 632 |
+
|
| 633 |
+
if enable_emo and emo_free.strip():
|
| 634 |
+
tb = TextBlob(emo_free)
|
| 635 |
+
pol = tb.sentiment.polarity
|
| 636 |
+
emo_label = "positive, uplifting" if pol > 0.3 else (
|
| 637 |
+
"sad, melancholic" if pol < -0.3 else "neutral, contemplative"
|
| 638 |
+
)
|
| 639 |
+
st.info(f"Sentiment: {emo_label} (polarity {pol:.2f})")
|
| 640 |
+
|
| 641 |
+
# REAL-TIME (Phase 2.D)
|
| 642 |
+
with col4:
|
| 643 |
+
st.markdown("### 🌎 Real-time Data")
|
| 644 |
+
enable_rt = st.checkbox("Enable Real-time Feeds", value=False, key="enable_rt_checkbox")
|
| 645 |
+
rt_context = ""
|
| 646 |
+
|
| 647 |
+
if enable_rt:
|
| 648 |
+
city = st.text_input("City (weather)", "Toronto", key="city_input")
|
| 649 |
+
headlines_num = st.slider("Headlines", 1, 5, 3, key="headlines_slider")
|
| 650 |
+
|
| 651 |
+
def get_weather(city):
|
| 652 |
+
try:
|
| 653 |
+
key = st.secrets.get("OPENWEATHER_KEY", None) if hasattr(st, "secrets") else None
|
| 654 |
+
url = "https://api.openweathermap.org/data/2.5/weather"
|
| 655 |
+
params = {
|
| 656 |
+
"q": city,
|
| 657 |
+
"units": "metric",
|
| 658 |
+
"appid": key or "9a524f695a4940f392150142250107"
|
| 659 |
+
}
|
| 660 |
+
r = requests.get(url, params=params, timeout=6).json()
|
| 661 |
+
return f"{r['weather'][0]['description']}, {r['main']['temp']:.1f}°C"
|
| 662 |
+
except Exception as e:
|
| 663 |
+
return f"unavailable ({e})"
|
| 664 |
+
|
| 665 |
+
def get_news(n):
|
| 666 |
+
try:
|
| 667 |
+
feed = feedparser.parse("https://feeds.bbci.co.uk/news/rss.xml")
|
| 668 |
+
return "; ".join([e["title"] for e in feed.entries[:n]])
|
| 669 |
+
except Exception as e:
|
| 670 |
+
return f"unavailable ({e})"
|
| 671 |
+
|
| 672 |
+
w = get_weather(city)
|
| 673 |
+
n = get_news(headlines_num)
|
| 674 |
+
st.caption(f"Weather: {w}")
|
| 675 |
+
st.caption(f"News: {n}")
|
| 676 |
+
rt_context = f"Current weather in {city}: {w}. Today's news: {n}."
|
| 677 |
+
|
| 678 |
+
# ==================== ADVANCED REFERENCES TAB (Phase 3.C) ====================
|
| 679 |
+
|
| 680 |
+
with tab_refs:
|
| 681 |
+
st.subheader("🖼️ Advanced Multi‑Reference + ControlNet")
|
| 682 |
+
enable_ref = st.checkbox("Enable Multi-Reference Processing", value=False, key="enable_ref_checkbox")
|
| 683 |
+
ref_images: List[Image.Image] = []
|
| 684 |
+
style_idxs = []
|
| 685 |
+
cn_images = []
|
| 686 |
+
img2img_strength = 0.55
|
| 687 |
+
cn_type = "Canny"
|
| 688 |
+
cn_scale = 1.0
|
| 689 |
+
|
| 690 |
+
if enable_ref:
|
| 691 |
+
# Multi-reference upload (files + URLs)
|
| 692 |
+
colU, colURL = st.columns(2)
|
| 693 |
+
|
| 694 |
+
with colU:
|
| 695 |
+
st.markdown("**���� Upload Images**")
|
| 696 |
+
uploads = st.file_uploader(
|
| 697 |
+
"Upload reference images",
|
| 698 |
+
type=["png", "jpg", "jpeg"],
|
| 699 |
+
accept_multiple_files=True,
|
| 700 |
+
key="ref_images_uploader"
|
| 701 |
+
)
|
| 702 |
+
if uploads:
|
| 703 |
+
for u in uploads:
|
| 704 |
+
try:
|
| 705 |
+
im = Image.open(u).convert("RGB")
|
| 706 |
+
ref_images.append(im)
|
| 707 |
+
except Exception as e:
|
| 708 |
+
st.warning(f"Upload failed: {e}")
|
| 709 |
+
|
| 710 |
+
with colURL:
|
| 711 |
+
st.markdown("**🔗 Image URLs**")
|
| 712 |
+
block = st.text_area(
|
| 713 |
+
"Paste image URLs (one per line)",
|
| 714 |
+
value=st.session_state.get("ref_urls_textarea", ""),
|
| 715 |
+
key="ref_urls_textarea",
|
| 716 |
+
)
|
| 717 |
+
if block.strip():
|
| 718 |
+
for line in block.splitlines():
|
| 719 |
+
url = line.strip()
|
| 720 |
+
if not url:
|
| 721 |
+
continue
|
| 722 |
+
try:
|
| 723 |
+
r = requests.get(url, timeout=8)
|
| 724 |
+
if r.status_code == 200:
|
| 725 |
+
im = Image.open(io.BytesIO(r.content)).convert("RGB")
|
| 726 |
+
ref_images.append(im)
|
| 727 |
+
except Exception as e:
|
| 728 |
+
st.warning(f"URL failed: {e}")
|
| 729 |
+
|
| 730 |
+
if ref_images:
|
| 731 |
+
# Display reference images
|
| 732 |
+
st.image(
|
| 733 |
+
ref_images,
|
| 734 |
+
width=180,
|
| 735 |
+
caption=[f"Ref {i+1}" for i in range(len(ref_images))]
|
| 736 |
+
)
|
| 737 |
+
|
| 738 |
+
# Role-based assignment (Phase 3.C key feature)
|
| 739 |
+
st.markdown("### 🎨 Reference Role Assignment")
|
| 740 |
+
style_idxs = st.multiselect(
|
| 741 |
+
"Use as **Style References (img2img)**",
|
| 742 |
+
list(range(1, len(ref_images)+1)),
|
| 743 |
+
default=list(range(1, len(ref_images)+1)),
|
| 744 |
+
help="These images will influence the artistic style and mood"
|
| 745 |
+
)
|
| 746 |
+
|
| 747 |
+
# ControlNet structure conditioning
|
| 748 |
+
use_cn = st.checkbox("Use **ControlNet** for structure", value=HAS_CONTROLNET)
|
| 749 |
+
if use_cn and not HAS_CONTROLNET:
|
| 750 |
+
st.warning("ControlNet not available in this environment.")
|
| 751 |
+
use_cn = False
|
| 752 |
+
|
| 753 |
+
if use_cn:
|
| 754 |
+
cn_type = st.selectbox("ControlNet type", ["Canny", "Depth"], index=0)
|
| 755 |
+
pick = st.selectbox(
|
| 756 |
+
"Pick **one** structural reference",
|
| 757 |
+
list(range(1, len(ref_images)+1)),
|
| 758 |
+
index=0,
|
| 759 |
+
help="This image will control the composition and structure"
|
| 760 |
+
)
|
| 761 |
+
|
| 762 |
+
# Live ControlNet preview (Phase 3.C key feature)
|
| 763 |
+
base = ref_images[int(pick)-1].resize((512, 512))
|
| 764 |
+
cn_map = canny_map(base) if cn_type == "Canny" else depth_proxy(base)
|
| 765 |
+
|
| 766 |
+
st.markdown("**🔍 Live ControlNet Preview**")
|
| 767 |
+
st.image(
|
| 768 |
+
[base, cn_map],
|
| 769 |
+
width=240,
|
| 770 |
+
caption=["Selected Reference", f"{cn_type} Map"]
|
| 771 |
+
)
|
| 772 |
+
cn_images = [cn_map]
|
| 773 |
+
cn_scale = st.slider("ControlNet conditioning scale", 0.1, 2.0, 1.0, 0.05)
|
| 774 |
+
|
| 775 |
+
# Style strength control
|
| 776 |
+
img2img_strength = st.slider(
|
| 777 |
+
"img2img strength (style adherence)",
|
| 778 |
+
0.2, 0.85, 0.55, 0.05,
|
| 779 |
+
help="Higher values follow style references more closely"
|
| 780 |
+
)
|
| 781 |
+
|
| 782 |
+
# ==================== MODEL & PERFORMANCE TAB (Phase 3.E) ====================
|
| 783 |
+
|
| 784 |
+
with tab_model:
|
| 785 |
+
st.subheader("⚙️ Model & Performance Management")
|
| 786 |
+
st.caption("Choose a base model, optional style add‑ons (LoRA), and tune speed/quality settings.")
|
| 787 |
+
|
| 788 |
+
# Presets and Glossary helpers
|
| 789 |
+
@st.dialog("Glossary: Common terms")
|
| 790 |
+
def show_glossary():
|
| 791 |
+
st.markdown(
|
| 792 |
+
"""
|
| 793 |
+
- Base model: The foundation that generates images (SD 1.5 = fast, SDXL = higher detail).
|
| 794 |
+
- Generation mode:
|
| 795 |
+
- txt2img: Create from your text prompt only.
|
| 796 |
+
- img2img: Start from an input image and transform it using your text.
|
| 797 |
+
- LoRA: A small add‑on that injects a trained style or subject. Use a .safetensors/.pt file.
|
| 798 |
+
- Width/Height: Image size in pixels. Bigger = more detail but slower and more VRAM.
|
| 799 |
+
- Steps: How long the model refines the image. More steps usually means cleaner details.
|
| 800 |
+
- Guidance: How strongly to follow your text. 6–9 is a good range; too high can look unnatural.
|
| 801 |
+
- Batch size: How many images at once. Higher uses more VRAM.
|
| 802 |
+
- Seed: Randomness control. Reuse the same non‑zero seed to reproduce a result.
|
| 803 |
+
- Upscale ×2: Quickly doubles resolution after generation.
|
| 804 |
+
- xFormers attention: GPU speed‑up if supported.
|
| 805 |
+
- Attention/VAE slicing: Reduce VRAM usage (slightly slower). Keep on for stability.
|
| 806 |
+
- VAE tiling: For very large images; decodes in tiles.
|
| 807 |
+
- Auto‑retry on CUDA OOM: If VRAM runs out, try again with safer settings.
|
| 808 |
+
"""
|
| 809 |
+
)
|
| 810 |
+
st.button("Close", use_container_width=True)
|
| 811 |
+
|
| 812 |
+
def apply_preset(name: str):
|
| 813 |
+
ss = st.session_state
|
| 814 |
+
def s(k, v):
|
| 815 |
+
ss[k] = v
|
| 816 |
+
if name == "fast":
|
| 817 |
+
s("model_choice_selectbox", "SD 1.5 (v1-5)")
|
| 818 |
+
s("gen_mode_selectbox", "txt2img")
|
| 819 |
+
s("width_input", 512); s("height_input", 512)
|
| 820 |
+
s("steps_input", 30); s("guidance_input", 7.5)
|
| 821 |
+
s("batch_input", 1); s("seed_input", 0)
|
| 822 |
+
s("upsample_checkbox", False)
|
| 823 |
+
s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
|
| 824 |
+
s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
|
| 825 |
+
s("oom_retry_checkbox", True)
|
| 826 |
+
elif name == "high":
|
| 827 |
+
model = "SDXL Base 1.0" if HAS_SDXL else "SD 1.5 (v1-5)"
|
| 828 |
+
s("model_choice_selectbox", model)
|
| 829 |
+
s("gen_mode_selectbox", "txt2img")
|
| 830 |
+
s("width_input", 768); s("height_input", 768)
|
| 831 |
+
s("steps_input", 40); s("guidance_input", 7.0)
|
| 832 |
+
s("batch_input", 1); s("seed_input", 0)
|
| 833 |
+
s("upsample_checkbox", True)
|
| 834 |
+
|
| 835 |
+
|
| 836 |
+
s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
|
| 837 |
+
s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
|
| 838 |
+
s("oom_retry_checkbox", True)
|
| 839 |
+
elif name == "low_vram":
|
| 840 |
+
s("model_choice_selectbox", "SD 1.5 (v1-5)")
|
| 841 |
+
s("gen_mode_selectbox", "txt2img")
|
| 842 |
+
s("width_input", 448); s("height_input", 448)
|
| 843 |
+
s("steps_input", 25); s("guidance_input", 7.5)
|
| 844 |
+
s("batch_input", 1); s("seed_input", 0)
|
| 845 |
+
s("upsample_checkbox", False)
|
| 846 |
+
s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
|
| 847 |
+
s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
|
| 848 |
+
s("oom_retry_checkbox", True)
|
| 849 |
+
elif name == "portrait":
|
| 850 |
+
s("gen_mode_selectbox", "txt2img")
|
| 851 |
+
s("width_input", 512); s("height_input", 768)
|
| 852 |
+
s("steps_input", 30); s("guidance_input", 7.5)
|
| 853 |
+
s("batch_input", 1)
|
| 854 |
+
elif name == "landscape":
|
| 855 |
+
s("gen_mode_selectbox", "txt2img")
|
| 856 |
+
s("width_input", 768); s("height_input", 512)
|
| 857 |
+
s("steps_input", 30); s("guidance_input", 7.5)
|
| 858 |
+
s("batch_input", 1)
|
| 859 |
+
elif name == "instagram":
|
| 860 |
+
s("gen_mode_selectbox", "txt2img")
|
| 861 |
+
s("width_input", 1024); s("height_input", 1024)
|
| 862 |
+
s("steps_input", 35); s("guidance_input", 7.0)
|
| 863 |
+
s("batch_input", 1); s("upsample_checkbox", False)
|
| 864 |
+
elif name == "defaults":
|
| 865 |
+
s("model_choice_selectbox", "SD 1.5 (v1-5)")
|
| 866 |
+
s("gen_mode_selectbox", "txt2img")
|
| 867 |
+
s("width_input", 512); s("height_input", 512)
|
| 868 |
+
s("steps_input", 30); s("guidance_input", 7.5)
|
| 869 |
+
s("batch_input", 1); s("seed_input", 0)
|
| 870 |
+
s("upsample_checkbox", False)
|
| 871 |
+
s("use_xformers_checkbox", True); s("attn_slice_checkbox", True)
|
| 872 |
+
s("vae_slice_checkbox", True); s("vae_tile_checkbox", False)
|
| 873 |
+
s("oom_retry_checkbox", True)
|
| 874 |
+
st.rerun()
|
| 875 |
+
|
| 876 |
+
colA, colB, colC, colD = st.columns(4)
|
| 877 |
+
with colA:
|
| 878 |
+
if st.button("⚡ Fast Start"):
|
| 879 |
+
apply_preset("fast")
|
| 880 |
+
with colB:
|
| 881 |
+
if st.button("🔍 High Detail"):
|
| 882 |
+
apply_preset("high")
|
| 883 |
+
with colC:
|
| 884 |
+
if st.button("💻 Low VRAM"):
|
| 885 |
+
apply_preset("low_vram")
|
| 886 |
+
with colD:
|
| 887 |
+
if st.button("❓ Glossary"):
|
| 888 |
+
show_glossary()
|
| 889 |
+
|
| 890 |
+
# Simple VRAM safety indicator (placed after preset buttons for visibility)
|
| 891 |
+
def estimate_pixels(w, h):
|
| 892 |
+
return int(w) * int(h)
|
| 893 |
+
def vram_risk_level(w, h, steps, batch, model_name):
|
| 894 |
+
px = estimate_pixels(w, h)
|
| 895 |
+
multiplier = 1.0 if "1.5" in model_name else 2.0 # SDXL ~2x heavier
|
| 896 |
+
load = (px / (512*512)) * (steps / 30.0) * max(1, batch) * multiplier
|
| 897 |
+
if load < 1.2:
|
| 898 |
+
return "✅ Likely safe"
|
| 899 |
+
elif load < 2.2:
|
| 900 |
+
return "⚠️ May be heavy — consider smaller size or steps"
|
| 901 |
+
else:
|
| 902 |
+
return "🟥 High risk of OOM — reduce size/batch/steps"
|
| 903 |
+
|
| 904 |
+
risk_msg = vram_risk_level(
|
| 905 |
+
st.session_state.get("width_input", 512),
|
| 906 |
+
st.session_state.get("height_input", 512),
|
| 907 |
+
st.session_state.get("steps_input", 30),
|
| 908 |
+
st.session_state.get("batch_input", 1),
|
| 909 |
+
st.session_state.get("model_choice_selectbox", "SD 1.5 (v1-5)")
|
| 910 |
+
)
|
| 911 |
+
st.info(f"VRAM safety: {risk_msg}")
|
| 912 |
+
|
| 913 |
+
|
| 914 |
+
|
| 915 |
+
# Additional simple layout for more presets and reset
|
| 916 |
+
colP0, colP1a, colP2a, colP3a, colP4a = st.columns(5)
|
| 917 |
+
with colP0:
|
| 918 |
+
if st.button("🧼 Reset to defaults"):
|
| 919 |
+
apply_preset("defaults")
|
| 920 |
+
with colP1a:
|
| 921 |
+
if st.button("🧍 Portrait"):
|
| 922 |
+
apply_preset("portrait")
|
| 923 |
+
with colP2a:
|
| 924 |
+
if st.button("🏞️ Landscape"):
|
| 925 |
+
apply_preset("landscape")
|
| 926 |
+
with colP3a:
|
| 927 |
+
if st.button("📸 Instagram Post"):
|
| 928 |
+
apply_preset("instagram")
|
| 929 |
+
with colP4a:
|
| 930 |
+
st.write("")
|
| 931 |
+
|
| 932 |
+
# Model selection
|
| 933 |
+
st.markdown("### 🤖 Model Selection")
|
| 934 |
+
model_choice = st.selectbox(
|
| 935 |
+
"Base model",
|
| 936 |
+
["SD 1.5 (v1-5)"] + (["SDXL Base 1.0"] if HAS_SDXL else []),
|
| 937 |
+
index=0,
|
| 938 |
+
help="Choose SD 1.5 for speed/low VRAM. Choose SDXL for higher detail (needs more VRAM/CPU).",
|
| 939 |
+
key="model_choice_selectbox"
|
| 940 |
+
)
|
| 941 |
+
gen_mode = st.selectbox(
|
| 942 |
+
"Generation mode",
|
| 943 |
+
["txt2img", "img2img"],
|
| 944 |
+
index=0,
|
| 945 |
+
help="txt2img: make an image from your text. img2img: start from a reference image and transform it.",
|
| 946 |
+
key="gen_mode_selectbox"
|
| 947 |
+
)
|
| 948 |
+
|
| 949 |
+
# LoRA integration
|
| 950 |
+
st.markdown("### 🎭 LoRA Integration")
|
| 951 |
+
use_lora = st.checkbox("Attach LoRA", value=False, help="LoRA = small add-on that injects a learned style or subject into the base model.", key="use_lora_checkbox")
|
| 952 |
+
lora_path = st.text_input("LoRA path", "", help="Path to the .safetensors/.pt LoRA file.", key="lora_path_input") if use_lora else ""
|
| 953 |
+
lora_scale = st.slider("LoRA scale", 0.1, 1.5, 0.8, 0.05, help="How strongly to apply the LoRA. Start at 0.7–0.9.", key="lora_scale_slider") if use_lora else 0.0
|
| 954 |
+
|
| 955 |
+
# Generation parameters
|
| 956 |
+
st.markdown("### 🎛️ Generation Parameters")
|
| 957 |
+
colP1, colP2, colP3, colP4 = st.columns(4)
|
| 958 |
+
with colP1:
|
| 959 |
+
width = st.number_input("Width", 256, 1536, 512, 64, help="Image width in pixels. Larger = more detail but slower and more VRAM.", key="width_input")
|
| 960 |
+
with colP2:
|
| 961 |
+
height = st.number_input("Height", 256, 1536, 512, 64, help="Image height in pixels. Common pairs: 512x512 (square), 768x512 (wide).", key="height_input")
|
| 962 |
+
with colP3:
|
| 963 |
+
steps = st.number_input("Steps", 10, 100, 30, 1, help="How long to refine the image. More steps = better quality but slower.", key="steps_input")
|
| 964 |
+
with colP4:
|
| 965 |
+
guidance = st.number_input("Guidance", 1.0, 20.0, 7.5, 0.5, help="How strongly to follow your text prompt. 6–9 is a good range.", key="guidance_input")
|
| 966 |
+
|
| 967 |
+
colP5, colP6, colP7 = st.columns(3)
|
| 968 |
+
with colP5:
|
| 969 |
+
batch = st.number_input("Batch size", 1, 6, 1, 1, help="How many images to generate at once. Higher uses more VRAM.", key="batch_input")
|
| 970 |
+
with colP6:
|
| 971 |
+
seed = st.number_input("Seed (0=random)", 0, 2**31-1, 0, 1, help="Use the same seed to reproduce a result. 0 picks a random seed.", key="seed_input")
|
| 972 |
+
with colP7:
|
| 973 |
+
upsample_x2 = st.checkbox("Upscale ×2 (latent upscaler)", value=False, help="Quickly doubles the resolution after generation.", key="upsample_checkbox")
|
| 974 |
+
|
| 975 |
+
# Performance optimizations
|
| 976 |
+
st.markdown("### ⚡ Performance & Reliability")
|
| 977 |
+
st.caption("These options help run on limited VRAM and reduce crashes. If you are new, keep the defaults on.")
|
| 978 |
+
colT1, colT2, colT3, colT4 = st.columns(4)
|
| 979 |
+
with colT1:
|
| 980 |
+
use_xformers = st.checkbox("xFormers attention", value=True, help="Speeds up attention on GPUs that support it.", key="use_xformers_checkbox")
|
| 981 |
+
with colT2:
|
| 982 |
+
attn_slice = st.checkbox("Attention slicing", value=True, help="Reduces VRAM usage, slightly slower.", key="attn_slice_checkbox")
|
| 983 |
+
with colT3:
|
| 984 |
+
vae_slice = st.checkbox("VAE slicing", value=True, help="Lower VRAM for the decoder, usually safe to keep on.", key="vae_slice_checkbox")
|
| 985 |
+
with colT4:
|
| 986 |
+
vae_tile = st.checkbox("VAE tiling", value=False, help="For very large images. Uses tiles to decode.", key="vae_tile_checkbox")
|
| 987 |
+
|
| 988 |
+
oom_retry = st.checkbox("Auto‑retry on CUDA OOM", value=True, help="If out‑of‑memory happens, try again with safer settings.", key="oom_retry_checkbox")
|
| 989 |
+
|
| 990 |
+
with st.expander("New to this? Quick tips"):
|
| 991 |
+
st.markdown(
|
| 992 |
+
"- For fast, reliable results: SD 1.5, 512×512, Steps 25–35, Guidance 7.5, Batch 1.\n"
|
| 993 |
+
"- Higher detail: try SDXL (needs more VRAM), Steps 30–50, bigger size like 768×768.\n"
|
| 994 |
+
"- Seed: 0 = random. Reuse a non‑zero seed to recreate a result.\n"
|
| 995 |
+
"- Out‑of‑memory? Lower width/height, set Batch = 1, keep slicing options on.\n"
|
| 996 |
+
"- LoRA: paste path to a .safetensors/.pt file. Start scale at 0.7–0.9.\n"
|
| 997 |
+
"- Modes: txt2img = from text; img2img = transform an existing image.\n"
|
| 998 |
+
"- Upscale ×2: quickly increases resolution after generation."
|
| 999 |
+
)
|
| 1000 |
+
|
| 1001 |
+
|
| 1002 |
+
# ==================== GENERATION SECTION BELOW INPUTS (Phase 3.B + 3.C + 3.E) ====================
|
| 1003 |
+
|
| 1004 |
+
with tab_inputs:
|
| 1005 |
+
st.markdown("<div class='section'>", unsafe_allow_html=True)
|
| 1006 |
+
st.subheader("🎛️ Fusion & Generation")
|
| 1007 |
+
|
| 1008 |
+
# Build final prompt from real processed inputs (Phase 3.B True Fusion)
|
| 1009 |
+
parts = [p for p in [main_prompt, style, mood] if p and p.strip()]
|
| 1010 |
+
|
| 1011 |
+
# Audio fusion - REAL processing
|
| 1012 |
+
if 'audio_caption' in locals() and enable_audio and audio_caption:
|
| 1013 |
+
parts.append(f"(sound of: {audio_caption})")
|
| 1014 |
+
if 'tempo' in locals() and enable_audio and tempo:
|
| 1015 |
+
tempo_desc = "slow tempo" if tempo < 90 else ("fast tempo" if tempo > 140 else "")
|
| 1016 |
+
if tempo_desc:
|
| 1017 |
+
parts.append(tempo_desc)
|
| 1018 |
+
if 'audio_tags' in locals() and enable_audio and audio_tags:
|
| 1019 |
+
parts.extend(audio_tags)
|
| 1020 |
+
|
| 1021 |
+
# Data fusion - REAL processing
|
| 1022 |
+
if 'data_summary' in locals() and enable_data and data_summary:
|
| 1023 |
+
parts.append(f"reflecting data patterns: {data_summary}")
|
| 1024 |
+
|
| 1025 |
+
# Emotion fusion - REAL processing
|
| 1026 |
+
if 'emo_label' in locals() and enable_emo and emo_label:
|
| 1027 |
+
parts.append(f"with a {emo_label} atmosphere")
|
| 1028 |
+
elif enable_emo and emo_free.strip():
|
| 1029 |
+
parts.append(f"evoking the feeling: {emo_free.strip()}")
|
| 1030 |
+
|
| 1031 |
+
# Real-time fusion - REAL processing
|
| 1032 |
+
if 'rt_context' in locals() and enable_rt and rt_context:
|
| 1033 |
+
parts.append(rt_context)
|
| 1034 |
+
|
| 1035 |
+
# Build final fused prompt
|
| 1036 |
+
final_prompt = ", ".join([p for p in parts if p])
|
| 1037 |
+
st.markdown("</div>", unsafe_allow_html=True)
|
| 1038 |
+
|
| 1039 |
+
st.markdown("### 🔮 Fused Prompt Preview")
|
| 1040 |
+
st.code(final_prompt, language="text")
|
| 1041 |
+
|
| 1042 |
+
# Initialize image for img2img
|
| 1043 |
+
init_image = None
|
| 1044 |
+
if gen_mode == "img2img" and enable_ref and style_idxs:
|
| 1045 |
+
# Use first chosen style reference as init image
|
| 1046 |
+
init_image = ref_images[style_idxs[0]-1].resize((int(width), int(height)))
|
| 1047 |
+
|
| 1048 |
+
# Generation + Clear buttons side-by-side
|
| 1049 |
+
col_gen, col_clear = st.columns([3, 1])
|
| 1050 |
+
with col_gen:
|
| 1051 |
+
go = st.button("🚀 Generate Multimodal Art", type="primary", use_container_width=True)
|
| 1052 |
+
with col_clear:
|
| 1053 |
+
clear = st.button("🧹 Clear", use_container_width=True)
|
| 1054 |
+
|
| 1055 |
+
# Clear logic: reset prompt fields and any generated output state
|
| 1056 |
+
if 'generated_images' not in st.session_state:
|
| 1057 |
+
st.session_state.generated_images = []
|
| 1058 |
+
if 'generation_results' not in st.session_state:
|
| 1059 |
+
st.session_state.generation_results = []
|
| 1060 |
+
|
| 1061 |
+
if clear:
|
| 1062 |
+
# Defer clearing input widgets by setting a flag, then rerun
|
| 1063 |
+
st.session_state["clear_inputs"] = True
|
| 1064 |
+
st.success("Cleared current prompt and output. Ready for a new prompt.")
|
| 1065 |
+
st.rerun()
|
| 1066 |
+
|
| 1067 |
+
# Cached pipeline getters
|
| 1068 |
+
@st.cache_resource(show_spinner=True)
|
| 1069 |
+
def get_txt2img():
|
| 1070 |
+
return load_sd15(txt2img=True)
|
| 1071 |
+
|
| 1072 |
+
@st.cache_resource(show_spinner=True)
|
| 1073 |
+
def get_img2img():
|
| 1074 |
+
return load_sd15(txt2img=False)
|
| 1075 |
+
|
| 1076 |
+
@st.cache_resource(show_spinner=True)
|
| 1077 |
+
def get_sdxl():
|
| 1078 |
+
return load_sdxl()
|
| 1079 |
+
|
| 1080 |
+
@st.cache_resource(show_spinner=True)
|
| 1081 |
+
def get_upscaler():
|
| 1082 |
+
return load_upscaler()
|
| 1083 |
+
|
| 1084 |
+
@st.cache_resource(show_spinner=True)
|
| 1085 |
+
def get_cn(cn_type: str):
|
| 1086 |
+
return load_controlnet(cn_type)
|
| 1087 |
+
|
| 1088 |
+
@st.cache_resource(show_spinner=True)
|
| 1089 |
+
def get_cn_i2i(cn_type: str):
|
| 1090 |
+
return load_controlnet_img2img(cn_type)
|
| 1091 |
+
|
| 1092 |
+
def apply_lora(pipe, lora_path, lora_scale):
|
| 1093 |
+
"""Apply LoRA to pipeline"""
|
| 1094 |
+
if not lora_path:
|
| 1095 |
+
return "No LoRA"
|
| 1096 |
+
try:
|
| 1097 |
+
pipe.load_lora_weights(lora_path)
|
| 1098 |
+
try:
|
| 1099 |
+
pipe.fuse_lora(lora_scale=lora_scale)
|
| 1100 |
+
except Exception:
|
| 1101 |
+
try:
|
| 1102 |
+
pipe.set_adapters(["default"], adapter_weights=[lora_scale])
|
| 1103 |
+
except Exception:
|
| 1104 |
+
pass
|
| 1105 |
+
return f"LoRA loaded: {os.path.basename(lora_path)} (scale {lora_scale})"
|
| 1106 |
+
except Exception as e:
|
| 1107 |
+
return f"LoRA failed: {e}"
|
| 1108 |
+
|
| 1109 |
+
def upsample_if_any(img: Image.Image):
|
| 1110 |
+
"""Apply upscaling if enabled"""
|
| 1111 |
+
if not upsample_x2 or not HAS_UPSCALER:
|
| 1112 |
+
return img, False, "none"
|
| 1113 |
+
try:
|
| 1114 |
+
up = get_upscaler()
|
| 1115 |
+
with (torch.autocast(DEVICE) if DEVICE == "cuda" else torch.no_grad()):
|
| 1116 |
+
out = up(prompt="sharp, detailed, high quality", image=img)
|
| 1117 |
+
return out.images[0], True, "latent_x2"
|
| 1118 |
+
except Exception as e:
|
| 1119 |
+
return img, False, f"fail:{e}"
|
| 1120 |
+
|
| 1121 |
+
def log_rows(rows, log_path):
|
| 1122 |
+
"""Log generation results"""
|
| 1123 |
+
exists = Path(log_path).exists()
|
| 1124 |
+
# Union header across Phase 3 logs
|
| 1125 |
+
header = [
|
| 1126 |
+
"filepath", "prompt", "neg_prompt", "steps", "guidance", "mode", "seed",
|
| 1127 |
+
"width", "height", "model", "img2img_strength", "cn_type", "cn_scale",
|
| 1128 |
+
"upscaled", "timestamp"
|
| 1129 |
+
]
|
| 1130 |
+
with open(log_path, "a", newline="", encoding="utf-8") as f:
|
| 1131 |
+
w = csv.writer(f)
|
| 1132 |
+
if not exists:
|
| 1133 |
+
w.writerow(header)
|
| 1134 |
+
for r in rows:
|
| 1135 |
+
w.writerow([r.get(k, "") for k in header])
|
| 1136 |
+
|
| 1137 |
+
# GENERATION EXECUTION
|
| 1138 |
+
if go:
|
| 1139 |
+
images, paths = [], []
|
| 1140 |
+
|
| 1141 |
+
# Choose pipeline based on model selection
|
| 1142 |
+
if model_choice.startswith("SDXL") and HAS_SDXL and gen_mode == "txt2img":
|
| 1143 |
+
pipe = get_sdxl()
|
| 1144 |
+
model_id = "SDXL-Base-1.0"
|
| 1145 |
+
else:
|
| 1146 |
+
if gen_mode == "txt2img":
|
| 1147 |
+
pipe = get_txt2img()
|
| 1148 |
+
model_id = "SD-1.5"
|
| 1149 |
+
else:
|
| 1150 |
+
pipe = get_img2img()
|
| 1151 |
+
model_id = "SD-1.5 (img2img)"
|
| 1152 |
+
|
| 1153 |
+
# Apply performance optimizations
|
| 1154 |
+
xformed = attempt_enable_xformers(pipe) if use_xformers else False
|
| 1155 |
+
apply_perf(pipe, attn_slice, vae_slice, vae_tile)
|
| 1156 |
+
|
| 1157 |
+
# Apply LoRA if specified
|
| 1158 |
+
lora_msg = ""
|
| 1159 |
+
if use_lora:
|
| 1160 |
+
lora_msg = apply_lora(pipe, lora_path, lora_scale)
|
| 1161 |
+
if lora_msg:
|
| 1162 |
+
st.caption(lora_msg)
|
| 1163 |
+
|
| 1164 |
+
# Determine generation mode based on available inputs (Phase 3.C intelligence)
|
| 1165 |
+
have_style = bool(style_idxs)
|
| 1166 |
+
have_cn = enable_ref and bool(cn_images)
|
| 1167 |
+
|
| 1168 |
+
# MODE PRIORITY: CN+I2I > CN only > I2I only > T2I
|
| 1169 |
+
mode = "T2I"
|
| 1170 |
+
if have_cn and have_style and HAS_CONTROLNET:
|
| 1171 |
+
mode = "CN+I2I"
|
| 1172 |
+
elif have_cn and HAS_CONTROLNET:
|
| 1173 |
+
mode = "CN"
|
| 1174 |
+
elif have_style:
|
| 1175 |
+
mode = "I2I"
|
| 1176 |
+
|
| 1177 |
+
st.info(f"Mode: **{mode}** • Model: **{model_id}** • xFormers: `{xformed}`")
|
| 1178 |
+
|
| 1179 |
+
rows = []
|
| 1180 |
+
attempt_list = list(safe_retry_sizes(height, width, steps)) if oom_retry else [(height, width, steps)]
|
| 1181 |
+
|
| 1182 |
+
# Generate batch
|
| 1183 |
+
for b in range(int(batch)):
|
| 1184 |
+
ok = False
|
| 1185 |
+
last_err = None
|
| 1186 |
+
|
| 1187 |
+
for (h_try, w_try, s_try) in attempt_list:
|
| 1188 |
+
try:
|
| 1189 |
+
# Seed management
|
| 1190 |
+
seed_eff = torch.seed() if seed == 0 else seed + b
|
| 1191 |
+
gen = torch.manual_seed(seed_eff) if DEVICE == "cpu" else torch.Generator(DEVICE).manual_seed(seed_eff)
|
| 1192 |
+
|
| 1193 |
+
with (torch.autocast(DEVICE) if DEVICE == "cuda" else torch.no_grad()):
|
| 1194 |
+
if mode == "CN+I2I":
|
| 1195 |
+
# Hybrid ControlNet + Img2Img (Phase 3.C advanced mode)
|
| 1196 |
+
if CN_IMG2IMG_AVAILABLE:
|
| 1197 |
+
cn_pipe = get_cn_i2i(cn_type)
|
| 1198 |
+
init_ref = ref_images[style_idxs[min(b, len(style_idxs)-1)]-1].resize((w_try, h_try))
|
| 1199 |
+
out = cn_pipe(
|
| 1200 |
+
prompt=final_prompt,
|
| 1201 |
+
image=init_ref,
|
| 1202 |
+
control_image=[im for im in cn_images],
|
| 1203 |
+
controlnet_conditioning_scale=cn_scale,
|
| 1204 |
+
strength=img2img_strength,
|
| 1205 |
+
num_inference_steps=s_try,
|
| 1206 |
+
guidance_scale=guidance,
|
| 1207 |
+
negative_prompt=neg_prompt if neg_prompt.strip() else None,
|
| 1208 |
+
generator=gen,
|
| 1209 |
+
)
|
| 1210 |
+
img = out.images[0]
|
| 1211 |
+
else:
|
| 1212 |
+
# Fallback two-pass approach
|
| 1213 |
+
cn_pipe = get_cn(cn_type)
|
| 1214 |
+
cn_out = cn_pipe(
|
| 1215 |
+
prompt=final_prompt,
|
| 1216 |
+
image=[im for im in cn_images],
|
| 1217 |
+
controlnet_conditioning_scale=cn_scale,
|
| 1218 |
+
num_inference_steps=max(s_try//2, 12),
|
| 1219 |
+
guidance_scale=guidance,
|
| 1220 |
+
negative_prompt=neg_prompt if neg_prompt.strip() else None,
|
| 1221 |
+
generator=gen,
|
| 1222 |
+
)
|
| 1223 |
+
struct_img = cn_out.images[0].resize((w_try, h_try))
|
| 1224 |
+
i2i = get_img2img()
|
| 1225 |
+
init_ref = ref_images[style_idxs[min(b, len(style_idxs)-1)]-1].resize((w_try, h_try))
|
| 1226 |
+
blend = Image.blend(init_ref, struct_img, 0.5)
|
| 1227 |
+
out = i2i(
|
| 1228 |
+
prompt=final_prompt,
|
| 1229 |
+
image=blend,
|
| 1230 |
+
strength=img2img_strength,
|
| 1231 |
+
num_inference_steps=s_try,
|
| 1232 |
+
guidance_scale=guidance,
|
| 1233 |
+
negative_prompt=neg_prompt if neg_prompt.strip() else None,
|
| 1234 |
+
generator=gen,
|
| 1235 |
+
)
|
| 1236 |
+
img = out.images[0]
|
| 1237 |
+
|
| 1238 |
+
elif mode == "CN":
|
| 1239 |
+
# ControlNet only
|
| 1240 |
+
cn_pipe = get_cn(cn_type)
|
| 1241 |
+
out = cn_pipe(
|
| 1242 |
+
prompt=final_prompt,
|
| 1243 |
+
image=[im for im in cn_images],
|
| 1244 |
+
controlnet_conditioning_scale=cn_scale,
|
| 1245 |
+
num_inference_steps=s_try,
|
| 1246 |
+
guidance_scale=guidance,
|
| 1247 |
+
negative_prompt=neg_prompt if neg_prompt.strip() else None,
|
| 1248 |
+
generator=gen,
|
| 1249 |
+
)
|
| 1250 |
+
img = out.images[0]
|
| 1251 |
+
|
| 1252 |
+
elif mode == "I2I":
|
| 1253 |
+
# Img2Img only
|
| 1254 |
+
i2i = get_img2img()
|
| 1255 |
+
init_ref = ref_images[style_idxs[min(b, len(style_idxs)-1)]-1].resize((w_try, h_try))
|
| 1256 |
+
out = i2i(
|
| 1257 |
+
prompt=final_prompt,
|
| 1258 |
+
image=init_ref,
|
| 1259 |
+
strength=img2img_strength,
|
| 1260 |
+
num_inference_steps=s_try,
|
| 1261 |
+
guidance_scale=guidance,
|
| 1262 |
+
negative_prompt=neg_prompt if neg_prompt.strip() else None,
|
| 1263 |
+
generator=gen,
|
| 1264 |
+
)
|
| 1265 |
+
img = out.images[0]
|
| 1266 |
+
|
| 1267 |
+
else:
|
| 1268 |
+
# Text-to-Image
|
| 1269 |
+
kwargs = dict(
|
| 1270 |
+
prompt=final_prompt,
|
| 1271 |
+
num_inference_steps=s_try,
|
| 1272 |
+
guidance_scale=guidance,
|
| 1273 |
+
negative_prompt=neg_prompt if neg_prompt.strip() else None,
|
| 1274 |
+
generator=gen,
|
| 1275 |
+
)
|
| 1276 |
+
if not (model_choice.startswith("SDXL") and HAS_SDXL):
|
| 1277 |
+
kwargs.update({"height": h_try, "width": w_try})
|
| 1278 |
+
out = pipe(**kwargs)
|
| 1279 |
+
img = out.images[0]
|
| 1280 |
+
|
| 1281 |
+
# Optional upscaling
|
| 1282 |
+
upscaled = "none"
|
| 1283 |
+
if upsample_x2 and HAS_UPSCALER:
|
| 1284 |
+
img, did_upscale, upscaled = upsample_if_any(img)
|
| 1285 |
+
|
| 1286 |
+
# Save image
|
| 1287 |
+
fname = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{mode}_{w_try}x{h_try}_s{s_try}_g{guidance}_seed{seed_eff}.png"
|
| 1288 |
+
path = save_image(img, fname)
|
| 1289 |
+
st.image(img, caption=fname, use_container_width=True)
|
| 1290 |
+
paths.append(path)
|
| 1291 |
+
images.append(img)
|
| 1292 |
+
|
| 1293 |
+
# Log generation
|
| 1294 |
+
rows.append({
|
| 1295 |
+
"filepath": path,
|
| 1296 |
+
"prompt": final_prompt,
|
| 1297 |
+
"neg_prompt": neg_prompt,
|
| 1298 |
+
"steps": s_try,
|
| 1299 |
+
"guidance": guidance,
|
| 1300 |
+
"mode": mode,
|
| 1301 |
+
"seed": seed_eff,
|
| 1302 |
+
"width": w_try,
|
| 1303 |
+
"height": h_try,
|
| 1304 |
+
"model": model_id,
|
| 1305 |
+
"img2img_strength": img2img_strength if mode in ["I2I", "CN+I2I"] else "",
|
| 1306 |
+
"cn_type": cn_type if mode in ["CN", "CN+I2I"] else "",
|
| 1307 |
+
"cn_scale": cn_scale if mode in ["CN", "CN+I2I"] else "",
|
| 1308 |
+
"upscaled": upscaled,
|
| 1309 |
+
"timestamp": datetime.now().isoformat()
|
| 1310 |
+
})
|
| 1311 |
+
ok = True
|
| 1312 |
+
break
|
| 1313 |
+
|
| 1314 |
+
except RuntimeError as e:
|
| 1315 |
+
if "out of memory" in str(e).lower() and oom_retry and DEVICE == "cuda":
|
| 1316 |
+
torch.cuda.empty_cache()
|
| 1317 |
+
st.warning(f"CUDA OOM — retrying at smaller size/steps…")
|
| 1318 |
+
continue
|
| 1319 |
+
else:
|
| 1320 |
+
st.error(f"Runtime error: {e}")
|
| 1321 |
+
last_err = str(e)
|
| 1322 |
+
break
|
| 1323 |
+
except Exception as e:
|
| 1324 |
+
st.error(f"Error: {e}")
|
| 1325 |
+
last_err = str(e)
|
| 1326 |
+
break
|
| 1327 |
+
|
| 1328 |
+
if not ok and last_err:
|
| 1329 |
+
st.error(f"Failed item {b+1}: {last_err}")
|
| 1330 |
+
|
| 1331 |
+
# Save results
|
| 1332 |
+
if rows:
|
| 1333 |
+
# Write unified run log (3.B/3.C/3.E compatible)
|
| 1334 |
+
log_rows(rows, RUNLOG)
|
| 1335 |
+
st.success(f"Saved {len(rows)} image(s). Run log updated: {RUNLOG}")
|
| 1336 |
+
|
| 1337 |
+
# ==================== GALLERY & ANNOTATE TAB (Phase 3.D) ====================
|
| 1338 |
+
|
| 1339 |
+
with tab_gallery:
|
| 1340 |
+
st.subheader("🖼️ Gallery & Filters")
|
| 1341 |
+
|
| 1342 |
+
# Helper functions for Phase 3.D workflow management
|
| 1343 |
+
def read_logs():
|
| 1344 |
+
"""Read and merge all log files"""
|
| 1345 |
+
frames = []
|
| 1346 |
+
for p in [RUNLOG, RUNLOG_3C, RUNLOG_3E]:
|
| 1347 |
+
if Path(p).exists():
|
| 1348 |
+
try:
|
| 1349 |
+
df = pd.read_csv(p)
|
| 1350 |
+
df["source_log"] = Path(p).name
|
| 1351 |
+
frames.append(df)
|
| 1352 |
+
except Exception as e:
|
| 1353 |
+
st.warning(f"Failed reading {p}: {e}")
|
| 1354 |
+
if not frames:
|
| 1355 |
+
return pd.DataFrame(columns=["filepath"])
|
| 1356 |
+
return pd.concat(frames, ignore_index=True).drop_duplicates(subset=["filepath"])
|
| 1357 |
+
|
| 1358 |
+
def scan_images():
|
| 1359 |
+
"""Scan output directory for images"""
|
| 1360 |
+
rows = [{"filepath": str(p), "filename": p.name} for p in OUTPUT_DIR.glob("*.png")]
|
| 1361 |
+
return pd.DataFrame(rows)
|
| 1362 |
+
|
| 1363 |
+
def load_annotations():
|
| 1364 |
+
"""Load existing annotations"""
|
| 1365 |
+
if ANNOT_CSV.exists():
|
| 1366 |
+
try:
|
| 1367 |
+
return pd.read_csv(ANNOT_CSV)
|
| 1368 |
+
except Exception:
|
| 1369 |
+
pass
|
| 1370 |
+
return pd.DataFrame(columns=["filepath", "rating", "tags", "notes"])
|
| 1371 |
+
|
| 1372 |
+
def save_annotations(df):
|
| 1373 |
+
"""Save annotations to CSV"""
|
| 1374 |
+
df.to_csv(ANNOT_CSV, index=False)
|
| 1375 |
+
|
| 1376 |
+
# Load data
|
| 1377 |
+
imgs_df = scan_images()
|
| 1378 |
+
logs_df = read_logs()
|
| 1379 |
+
ann_df = load_annotations()
|
| 1380 |
+
meta_df = imgs_df.merge(logs_df, on="filepath", how="left")
|
| 1381 |
+
|
| 1382 |
+
if meta_df.empty:
|
| 1383 |
+
st.info("No images found in outputs/. Generate some images first.")
|
| 1384 |
+
else:
|
| 1385 |
+
# Filtering controls
|
| 1386 |
+
st.markdown("### 🔍 Filter Images")
|
| 1387 |
+
colf1, colf2, colf3 = st.columns(3)
|
| 1388 |
+
|
| 1389 |
+
with colf1:
|
| 1390 |
+
mode_opt = ["(all)"] + sorted([m for m in meta_df.get("mode", pd.Series([])).dropna().unique()])
|
| 1391 |
+
sel_mode = st.selectbox("Filter by mode", mode_opt, index=0)
|
| 1392 |
+
|
| 1393 |
+
with colf2:
|
| 1394 |
+
prompt_filter = st.text_input("Filter prompt contains", "")
|
| 1395 |
+
|
| 1396 |
+
with colf3:
|
| 1397 |
+
min_steps = st.number_input("Min steps", 0, 200, 0, 1)
|
| 1398 |
+
|
| 1399 |
+
# Apply filters
|
| 1400 |
+
filtered = meta_df.copy()
|
| 1401 |
+
if sel_mode != "(all)" and "mode" in filtered.columns:
|
| 1402 |
+
filtered = filtered[filtered["mode"] == sel_mode]
|
| 1403 |
+
if prompt_filter.strip() and "prompt" in filtered.columns:
|
| 1404 |
+
filtered = filtered[filtered["prompt"].fillna("").str.contains(prompt_filter, case=False)]
|
| 1405 |
+
if "steps" in filtered.columns:
|
| 1406 |
+
try:
|
| 1407 |
+
filtered = filtered[pd.to_numeric(filtered["steps"], errors="coerce").fillna(0) >= min_steps]
|
| 1408 |
+
except Exception:
|
| 1409 |
+
pass
|
| 1410 |
+
|
| 1411 |
+
st.caption(f"{len(filtered)} image(s) match filters.")
|
| 1412 |
+
|
| 1413 |
+
# Display gallery
|
| 1414 |
+
if not filtered.empty:
|
| 1415 |
+
st.markdown("### 🖼️ Image Gallery")
|
| 1416 |
+
cols = st.columns(4)
|
| 1417 |
+
for i, row in filtered.reset_index(drop=True).iterrows():
|
| 1418 |
+
with cols[i % 4]:
|
| 1419 |
+
p = row["filepath"]
|
| 1420 |
+
try:
|
| 1421 |
+
st.image(p, use_container_width=True, caption=os.path.basename(p))
|
| 1422 |
+
except Exception:
|
| 1423 |
+
st.write(os.path.basename(p))
|
| 1424 |
+
if "prompt" in row and pd.notna(row["prompt"]):
|
| 1425 |
+
st.caption(row["prompt"][:120])
|
| 1426 |
+
|
| 1427 |
+
# Annotation system
|
| 1428 |
+
st.markdown("---")
|
| 1429 |
+
st.subheader("✍️ Annotate / Rate / Tag")
|
| 1430 |
+
choose = st.multiselect("Pick images to annotate", meta_df["filepath"].tolist())
|
| 1431 |
+
|
| 1432 |
+
if choose:
|
| 1433 |
+
for path in choose:
|
| 1434 |
+
st.markdown("---")
|
| 1435 |
+
st.write(f"**{os.path.basename(path)}**")
|
| 1436 |
+
try:
|
| 1437 |
+
st.image(path, width=320)
|
| 1438 |
+
except Exception:
|
| 1439 |
+
pass
|
| 1440 |
+
|
| 1441 |
+
# Get existing annotation values
|
| 1442 |
+
prev = ann_df[ann_df["filepath"] == path]
|
| 1443 |
+
rating_val = int(prev.iloc[0]["rating"]) if not prev.empty and not pd.isna(prev.iloc[0]["rating"]) else 3
|
| 1444 |
+
tags_val = prev.iloc[0]["tags"] if not prev.empty else ""
|
| 1445 |
+
notes_val = prev.iloc[0]["notes"] if not prev.empty else ""
|
| 1446 |
+
|
| 1447 |
+
# Annotation controls
|
| 1448 |
+
colE1, colE2, colE3 = st.columns([1, 1, 2])
|
| 1449 |
+
with colE1:
|
| 1450 |
+
rating = st.slider(
|
| 1451 |
+
f"Rating {os.path.basename(path)}",
|
| 1452 |
+
1, 5, rating_val, 1,
|
| 1453 |
+
key=f"rate_{path}"
|
| 1454 |
+
)
|
| 1455 |
+
with colE2:
|
| 1456 |
+
tags = st.text_input("Tags", tags_val, key=f"tags_{path}")
|
| 1457 |
+
with colE3:
|
| 1458 |
+
notes = st.text_area("Notes", notes_val, key=f"notes_{path}")
|
| 1459 |
+
|
| 1460 |
+
# Update annotations dataframe
|
| 1461 |
+
if (ann_df["filepath"] == path).any():
|
| 1462 |
+
ann_df.loc[ann_df["filepath"] == path, ["rating", "tags", "notes"]] = [rating, tags, notes]
|
| 1463 |
+
else:
|
| 1464 |
+
ann_df.loc[len(ann_df)] = [path, rating, tags, notes]
|
| 1465 |
+
|
| 1466 |
+
if st.button("💾 Save annotations", use_container_width=True):
|
| 1467 |
+
save_annotations(ann_df)
|
| 1468 |
+
st.success("Annotations saved!")
|
| 1469 |
+
else:
|
| 1470 |
+
st.info("Select images above to annotate them.")
|
| 1471 |
+
|
| 1472 |
+
# ==================== PRESETS TAB (Phase 3.D) ====================
|
| 1473 |
+
|
| 1474 |
+
with tab_presets:
|
| 1475 |
+
st.subheader("💾 Create / Save / Load Presets")
|
| 1476 |
+
|
| 1477 |
+
# Preset creation
|
| 1478 |
+
st.markdown("### 🎛️ Create New Preset")
|
| 1479 |
+
colP1, colP2 = st.columns(2)
|
| 1480 |
+
|
| 1481 |
+
with colP1:
|
| 1482 |
+
preset_name = st.text_input("Preset name", "my_style", key="preset_name_input")
|
| 1483 |
+
p_prompt = st.text_input("Prompt", main_prompt or "A serene cyberpunk alley at dawn", key="preset_prompt_input")
|
| 1484 |
+
p_style = st.text_input("Style", style or "digital painting", key="preset_style_input")
|
| 1485 |
+
p_mood = st.text_input("Mood", mood or ", ".join(MOOD_OPTIONS[:2]), key="preset_mood_input")
|
| 1486 |
+
p_neg = st.text_input("Negative", neg_prompt or "", key="preset_neg_input")
|
| 1487 |
+
|
| 1488 |
+
with colP2:
|
| 1489 |
+
p_steps = st.number_input("Steps", 10, 100, steps or 30, 1, key="preset_steps_input")
|
| 1490 |
+
p_guid = st.number_input("Guidance", 1.0, 20.0, guidance or 7.5, 0.5, key="preset_guidance_input")
|
| 1491 |
+
p_i2i = st.slider("img2img strength", 0.2, 0.9, 0.55, 0.05, key="preset_i2i_slider")
|
| 1492 |
+
p_cn_type = st.selectbox("ControlNet type", ["Canny", "Depth"], key="preset_cn_type_selectbox")
|
| 1493 |
+
p_cn_scale = st.slider("ControlNet scale", 0.1, 2.0, 1.0, 0.05, key="preset_cn_scale_slider")
|
| 1494 |
+
|
| 1495 |
+
# Build preset object
|
| 1496 |
+
preset = {
|
| 1497 |
+
"name": preset_name,
|
| 1498 |
+
"prompt": p_prompt,
|
| 1499 |
+
"style": p_style,
|
| 1500 |
+
"mood": p_mood,
|
| 1501 |
+
"negative": p_neg,
|
| 1502 |
+
"steps": p_steps,
|
| 1503 |
+
"guidance": p_guid,
|
| 1504 |
+
"img2img_strength": p_i2i,
|
| 1505 |
+
"controlnet": {"type": p_cn_type, "scale": p_cn_scale},
|
| 1506 |
+
"created_at": datetime.now().isoformat()
|
| 1507 |
+
}
|
| 1508 |
+
|
| 1509 |
+
st.markdown("### 📋 Preset Preview")
|
| 1510 |
+
st.code(json.dumps(preset, indent=2), language="json")
|
| 1511 |
+
|
| 1512 |
+
# Save/Load controls
|
| 1513 |
+
colPS1, colPS2 = st.columns(2)
|
| 1514 |
+
|
| 1515 |
+
with colPS1:
|
| 1516 |
+
st.markdown("### 💾 Save Preset")
|
| 1517 |
+
if st.button("💾 Save preset", use_container_width=True, key="save_preset_button"):
|
| 1518 |
+
if preset_name.strip():
|
| 1519 |
+
fp = PRESETS_DIR / f"{preset_name}.json"
|
| 1520 |
+
with open(fp, "w", encoding="utf-8") as f:
|
| 1521 |
+
json.dump(preset, f, indent=2)
|
| 1522 |
+
st.success(f"Saved {fp}")
|
| 1523 |
+
else:
|
| 1524 |
+
st.error("Please enter a preset name")
|
| 1525 |
+
|
| 1526 |
+
with colPS2:
|
| 1527 |
+
st.markdown("### 📂 Load Preset")
|
| 1528 |
+
existing = sorted([p.name for p in PRESETS_DIR.glob("*.json")])
|
| 1529 |
+
if existing:
|
| 1530 |
+
sel = st.selectbox("Load preset", ["(choose)"] + existing, key="load_preset_selectbox")
|
| 1531 |
+
if sel != "(choose)":
|
| 1532 |
+
with open(PRESETS_DIR / sel, "r", encoding="utf-8") as f:
|
| 1533 |
+
loaded = json.load(f)
|
| 1534 |
+
st.success(f"Loaded {sel}")
|
| 1535 |
+
st.code(json.dumps(loaded, indent=2), language="json")
|
| 1536 |
+
else:
|
| 1537 |
+
st.info("No presets found. Create your first preset above!")
|
| 1538 |
+
|
| 1539 |
+
# ==================== EXPORT TAB (Phase 3.D) ====================
|
| 1540 |
+
|
| 1541 |
+
with tab_export:
|
| 1542 |
+
st.subheader("📦 Export Bundle (ZIP)")
|
| 1543 |
+
|
| 1544 |
+
# Helper functions for export
|
| 1545 |
+
def read_logs_all():
|
| 1546 |
+
"""Read all logs for export"""
|
| 1547 |
+
frames = []
|
| 1548 |
+
for p in [RUNLOG, RUNLOG_3C, RUNLOG_3E]:
|
| 1549 |
+
if Path(p).exists():
|
| 1550 |
+
try:
|
| 1551 |
+
df = pd.read_csv(p)
|
| 1552 |
+
df["source_log"] = Path(p).name
|
| 1553 |
+
frames.append(df)
|
| 1554 |
+
except Exception as e:
|
| 1555 |
+
st.warning(f"Read fail {p}: {e}")
|
| 1556 |
+
if not frames:
|
| 1557 |
+
return pd.DataFrame(columns=["filepath"])
|
| 1558 |
+
return pd.concat(frames, ignore_index=True).drop_duplicates(subset=["filepath"])
|
| 1559 |
+
|
| 1560 |
+
def scan_imgs():
|
| 1561 |
+
"""Scan images for export"""
|
| 1562 |
+
return pd.DataFrame([
|
| 1563 |
+
{"filepath": str(p), "filename": p.name}
|
| 1564 |
+
for p in OUTPUT_DIR.glob("*.png")
|
| 1565 |
+
])
|
| 1566 |
+
|
| 1567 |
+
# Load export data
|
| 1568 |
+
imgs_df = scan_imgs()
|
| 1569 |
+
logs_df = read_logs_all()
|
| 1570 |
+
|
| 1571 |
+
if imgs_df.empty:
|
| 1572 |
+
st.info("No images to export yet. Generate some images first.")
|
| 1573 |
+
else:
|
| 1574 |
+
meta_df = imgs_df.merge(logs_df, on="filepath", how="left")
|
| 1575 |
+
|
| 1576 |
+
# Display available images
|
| 1577 |
+
st.markdown("### 📋 Available Images")
|
| 1578 |
+
display_cols = ["filepath", "prompt", "mode", "steps", "guidance"]
|
| 1579 |
+
available_cols = [col for col in display_cols if col in meta_df.columns]
|
| 1580 |
+
st.dataframe(
|
| 1581 |
+
meta_df[available_cols].fillna("").astype(str),
|
| 1582 |
+
use_container_width=True,
|
| 1583 |
+
height=240
|
| 1584 |
+
)
|
| 1585 |
+
|
| 1586 |
+
# Export selection
|
| 1587 |
+
st.markdown("### 🎯 Export Selection")
|
| 1588 |
+
sel = st.multiselect(
|
| 1589 |
+
"Select images to export",
|
| 1590 |
+
meta_df["filepath"].tolist(),
|
| 1591 |
+
default=meta_df["filepath"].tolist()[:8],
|
| 1592 |
+
key="export_images_multiselect"
|
| 1593 |
+
)
|
| 1594 |
+
|
| 1595 |
+
# Preset inclusion
|
| 1596 |
+
include_preset = st.checkbox("Include preset.json", value=False, key="include_preset_checkbox")
|
| 1597 |
+
preset_blob = None
|
| 1598 |
+
if include_preset:
|
| 1599 |
+
ex = sorted([p.name for p in PRESETS_DIR.glob("*.json")])
|
| 1600 |
+
if ex:
|
| 1601 |
+
choose = st.selectbox("Choose preset", ex, key="export_preset_selectbox")
|
| 1602 |
+
with open(PRESETS_DIR / choose, "r", encoding="utf-8") as f:
|
| 1603 |
+
preset_blob = json.load(f)
|
| 1604 |
+
else:
|
| 1605 |
+
st.warning("No presets found in /presets")
|
| 1606 |
+
include_preset = False
|
| 1607 |
+
|
| 1608 |
+
# Bundle configuration
|
| 1609 |
+
bundle_name = st.text_input(
|
| 1610 |
+
"Bundle name (no spaces)",
|
| 1611 |
+
f"compi_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}",
|
| 1612 |
+
key="bundle_name_input"
|
| 1613 |
+
)
|
| 1614 |
+
|
| 1615 |
+
# Create export bundle
|
| 1616 |
+
if st.button("📦 Create Export Bundle", type="primary", use_container_width=True, key="create_bundle_button"):
|
| 1617 |
+
if not sel:
|
| 1618 |
+
st.error("Pick at least one image.")
|
| 1619 |
+
elif not bundle_name.strip():
|
| 1620 |
+
st.error("Please enter a bundle name.")
|
| 1621 |
+
else:
|
| 1622 |
+
with st.spinner("Creating export bundle..."):
|
| 1623 |
+
# Create temporary directory
|
| 1624 |
+
tmp_dir = EXPORTS_DIR / bundle_name
|
| 1625 |
+
if tmp_dir.exists():
|
| 1626 |
+
shutil.rmtree(tmp_dir)
|
| 1627 |
+
(tmp_dir / "images").mkdir(parents=True, exist_ok=True)
|
| 1628 |
+
|
| 1629 |
+
# Copy images
|
| 1630 |
+
for p in sel:
|
| 1631 |
+
try:
|
| 1632 |
+
shutil.copy2(p, tmp_dir / "images" / os.path.basename(p))
|
| 1633 |
+
except Exception as e:
|
| 1634 |
+
st.warning(f"Copy failed: {p} ({e})")
|
| 1635 |
+
|
| 1636 |
+
# Export metadata
|
| 1637 |
+
msel = meta_df[meta_df["filepath"].isin(sel)].copy()
|
| 1638 |
+
msel.to_csv(tmp_dir / "metadata.csv", index=False)
|
| 1639 |
+
|
| 1640 |
+
# Export annotations
|
| 1641 |
+
if ANNOT_CSV.exists():
|
| 1642 |
+
shutil.copy2(ANNOT_CSV, tmp_dir / "annotations.csv")
|
| 1643 |
+
else:
|
| 1644 |
+
pd.DataFrame(columns=["filepath", "rating", "tags", "notes"]).to_csv(
|
| 1645 |
+
tmp_dir / "annotations.csv", index=False
|
| 1646 |
+
)
|
| 1647 |
+
|
| 1648 |
+
# Create manifest
|
| 1649 |
+
manifest = {
|
| 1650 |
+
"bundle_name": bundle_name,
|
| 1651 |
+
"created_at": datetime.now().isoformat(),
|
| 1652 |
+
"environment": env_snapshot(),
|
| 1653 |
+
"includes": {
|
| 1654 |
+
"images": True,
|
| 1655 |
+
"metadata_csv": True,
|
| 1656 |
+
"annotations_csv": True,
|
| 1657 |
+
"preset_json": bool(preset_blob),
|
| 1658 |
+
"readme_md": True
|
| 1659 |
+
}
|
| 1660 |
+
}
|
| 1661 |
+
with open(tmp_dir / "manifest.json", "w", encoding="utf-8") as f:
|
| 1662 |
+
json.dump(manifest, f, indent=2)
|
| 1663 |
+
|
| 1664 |
+
# Include preset if specified
|
| 1665 |
+
if preset_blob:
|
| 1666 |
+
with open(tmp_dir / "preset.json", "w", encoding="utf-8") as f:
|
| 1667 |
+
json.dump(preset_blob, f, indent=2)
|
| 1668 |
+
|
| 1669 |
+
# Create README
|
| 1670 |
+
with open(tmp_dir / "README.md", "w", encoding="utf-8") as f:
|
| 1671 |
+
f.write(mk_readme(manifest, msel))
|
| 1672 |
+
|
| 1673 |
+
# Create ZIP file
|
| 1674 |
+
zpath = EXPORTS_DIR / f"{bundle_name}.zip"
|
| 1675 |
+
if zpath.exists():
|
| 1676 |
+
zpath.unlink()
|
| 1677 |
+
|
| 1678 |
+
with zipfile.ZipFile(zpath, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 1679 |
+
for root, _, files in os.walk(tmp_dir):
|
| 1680 |
+
for file in files:
|
| 1681 |
+
full = Path(root) / file
|
| 1682 |
+
zf.write(full, full.relative_to(tmp_dir))
|
| 1683 |
+
|
| 1684 |
+
# Cleanup temporary directory
|
| 1685 |
+
shutil.rmtree(tmp_dir, ignore_errors=True)
|
| 1686 |
+
|
| 1687 |
+
st.success(f"✅ Export created: {zpath}")
|
| 1688 |
+
st.info(f"📁 Bundle size: {zpath.stat().st_size / (1024*1024):.1f} MB")
|
| 1689 |
+
|
| 1690 |
+
# Provide download link
|
| 1691 |
+
with open(zpath, "rb") as f:
|
| 1692 |
+
st.download_button(
|
| 1693 |
+
label="📥 Download Export Bundle",
|
| 1694 |
+
data=f.read(),
|
| 1695 |
+
file_name=f"{bundle_name}.zip",
|
| 1696 |
+
mime="application/zip",
|
| 1697 |
+
use_container_width=True
|
| 1698 |
+
)
|
| 1699 |
+
|
| 1700 |
+
# ==================== FOOTER ====================
|
| 1701 |
+
|
| 1702 |
+
st.markdown("---")
|
| 1703 |
+
st.markdown("""
|
| 1704 |
+
<div style='text-align: center; color: #666; padding: 20px;'>
|
| 1705 |
+
<strong>🧪 CompI Phase 3 Final Dashboard</strong><br>
|
| 1706 |
+
Complete integration of all Phase 3 components (3.A → 3.E)<br>
|
| 1707 |
+
<em>Multimodal AI Art Generation • Advanced References • Performance Management • Professional Workflow</em>
|
| 1708 |
+
</div>
|
| 1709 |
+
""", unsafe_allow_html=True)
|
src/utils/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Utility functions for CompI project.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from .logging_utils import setup_logger
|
| 6 |
+
from .file_utils import save_image, save_audio, load_config
|
| 7 |
+
from .image_utils import ImageProcessor, StyleAnalyzer
|
| 8 |
+
|
| 9 |
+
__all__ = [
|
| 10 |
+
"setup_logger",
|
| 11 |
+
"save_image",
|
| 12 |
+
"save_audio",
|
| 13 |
+
"load_config",
|
| 14 |
+
"ImageProcessor",
|
| 15 |
+
"StyleAnalyzer"
|
| 16 |
+
]
|
src/utils/audio_utils.py
ADDED
|
@@ -0,0 +1,342 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Audio processing utilities for CompI Phase 2.A: Audio Input Integration
|
| 3 |
+
|
| 4 |
+
This module provides comprehensive audio analysis capabilities including:
|
| 5 |
+
- Audio feature extraction (tempo, energy, spectral features)
|
| 6 |
+
- Audio preprocessing and normalization
|
| 7 |
+
- Audio-to-text captioning using OpenAI Whisper
|
| 8 |
+
- Multimodal prompt fusion combining audio features with text prompts
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import numpy as np
|
| 13 |
+
import librosa
|
| 14 |
+
import soundfile as sf
|
| 15 |
+
from typing import Dict, List, Optional, Tuple, Union
|
| 16 |
+
import logging
|
| 17 |
+
from dataclasses import dataclass
|
| 18 |
+
|
| 19 |
+
# Setup logging
|
| 20 |
+
logging.basicConfig(level=logging.INFO)
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
@dataclass
|
| 24 |
+
class AudioFeatures:
|
| 25 |
+
"""Container for extracted audio features"""
|
| 26 |
+
tempo: float
|
| 27 |
+
energy: float # RMS energy
|
| 28 |
+
zero_crossing_rate: float
|
| 29 |
+
spectral_centroid: float
|
| 30 |
+
spectral_rolloff: float
|
| 31 |
+
mfcc_mean: np.ndarray
|
| 32 |
+
chroma_mean: np.ndarray
|
| 33 |
+
duration: float
|
| 34 |
+
sample_rate: int
|
| 35 |
+
|
| 36 |
+
def to_dict(self) -> Dict:
|
| 37 |
+
"""Convert to dictionary for JSON serialization"""
|
| 38 |
+
return {
|
| 39 |
+
'tempo': float(self.tempo),
|
| 40 |
+
'energy': float(self.energy),
|
| 41 |
+
'zero_crossing_rate': float(self.zero_crossing_rate),
|
| 42 |
+
'spectral_centroid': float(self.spectral_centroid),
|
| 43 |
+
'spectral_rolloff': float(self.spectral_rolloff),
|
| 44 |
+
'mfcc_mean': self.mfcc_mean.tolist() if hasattr(self.mfcc_mean, 'tolist') else list(self.mfcc_mean),
|
| 45 |
+
'chroma_mean': self.chroma_mean.tolist() if hasattr(self.chroma_mean, 'tolist') else list(self.chroma_mean),
|
| 46 |
+
'duration': float(self.duration),
|
| 47 |
+
'sample_rate': int(self.sample_rate)
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
class AudioProcessor:
|
| 51 |
+
"""Comprehensive audio processing and analysis"""
|
| 52 |
+
|
| 53 |
+
def __init__(self, target_sr: int = 16000, max_duration: float = 60.0):
|
| 54 |
+
"""
|
| 55 |
+
Initialize audio processor
|
| 56 |
+
|
| 57 |
+
Args:
|
| 58 |
+
target_sr: Target sample rate for processing
|
| 59 |
+
max_duration: Maximum audio duration to process (seconds)
|
| 60 |
+
"""
|
| 61 |
+
self.target_sr = target_sr
|
| 62 |
+
self.max_duration = max_duration
|
| 63 |
+
|
| 64 |
+
def load_audio(self, audio_path: str) -> Tuple[np.ndarray, int]:
|
| 65 |
+
"""
|
| 66 |
+
Load and preprocess audio file
|
| 67 |
+
|
| 68 |
+
Args:
|
| 69 |
+
audio_path: Path to audio file
|
| 70 |
+
|
| 71 |
+
Returns:
|
| 72 |
+
Tuple of (audio_data, sample_rate)
|
| 73 |
+
"""
|
| 74 |
+
try:
|
| 75 |
+
# Load audio with librosa
|
| 76 |
+
audio, sr = librosa.load(
|
| 77 |
+
audio_path,
|
| 78 |
+
sr=self.target_sr,
|
| 79 |
+
duration=self.max_duration
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# Normalize audio
|
| 83 |
+
audio = librosa.util.normalize(audio)
|
| 84 |
+
|
| 85 |
+
logger.info(f"Loaded audio: {audio_path}, duration: {len(audio)/sr:.2f}s")
|
| 86 |
+
return audio, sr
|
| 87 |
+
|
| 88 |
+
except Exception as e:
|
| 89 |
+
logger.error(f"Error loading audio {audio_path}: {e}")
|
| 90 |
+
raise
|
| 91 |
+
|
| 92 |
+
def extract_features(self, audio: np.ndarray, sr: int) -> AudioFeatures:
|
| 93 |
+
"""
|
| 94 |
+
Extract comprehensive audio features
|
| 95 |
+
|
| 96 |
+
Args:
|
| 97 |
+
audio: Audio signal
|
| 98 |
+
sr: Sample rate
|
| 99 |
+
|
| 100 |
+
Returns:
|
| 101 |
+
AudioFeatures object containing all extracted features
|
| 102 |
+
"""
|
| 103 |
+
try:
|
| 104 |
+
# Basic features
|
| 105 |
+
duration = len(audio) / sr
|
| 106 |
+
|
| 107 |
+
# Tempo and beat tracking
|
| 108 |
+
tempo, _ = librosa.beat.beat_track(y=audio, sr=sr)
|
| 109 |
+
|
| 110 |
+
# Energy (RMS)
|
| 111 |
+
rms = librosa.feature.rms(y=audio)[0]
|
| 112 |
+
energy = np.sqrt(np.mean(rms**2))
|
| 113 |
+
|
| 114 |
+
# Zero crossing rate
|
| 115 |
+
zcr = librosa.feature.zero_crossing_rate(audio)[0]
|
| 116 |
+
zcr_mean = np.mean(zcr)
|
| 117 |
+
|
| 118 |
+
# Spectral features
|
| 119 |
+
spectral_centroids = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
|
| 120 |
+
spectral_centroid = np.mean(spectral_centroids)
|
| 121 |
+
|
| 122 |
+
spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
|
| 123 |
+
spectral_rolloff_mean = np.mean(spectral_rolloff)
|
| 124 |
+
|
| 125 |
+
# MFCC features
|
| 126 |
+
mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
|
| 127 |
+
mfcc_mean = np.mean(mfccs, axis=1)
|
| 128 |
+
|
| 129 |
+
# Chroma features
|
| 130 |
+
chroma = librosa.feature.chroma_stft(y=audio, sr=sr)
|
| 131 |
+
chroma_mean = np.mean(chroma, axis=1)
|
| 132 |
+
|
| 133 |
+
features = AudioFeatures(
|
| 134 |
+
tempo=float(tempo),
|
| 135 |
+
energy=float(energy),
|
| 136 |
+
zero_crossing_rate=float(zcr_mean),
|
| 137 |
+
spectral_centroid=float(spectral_centroid),
|
| 138 |
+
spectral_rolloff=float(spectral_rolloff_mean),
|
| 139 |
+
mfcc_mean=mfcc_mean,
|
| 140 |
+
chroma_mean=chroma_mean,
|
| 141 |
+
duration=float(duration),
|
| 142 |
+
sample_rate=int(sr)
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
logger.info(f"Extracted features: tempo={float(tempo):.1f}, energy={float(energy):.4f}")
|
| 146 |
+
return features
|
| 147 |
+
|
| 148 |
+
except Exception as e:
|
| 149 |
+
logger.error(f"Error extracting audio features: {e}")
|
| 150 |
+
raise
|
| 151 |
+
|
| 152 |
+
def analyze_audio_file(self, audio_path: str) -> AudioFeatures:
|
| 153 |
+
"""
|
| 154 |
+
Complete audio analysis pipeline
|
| 155 |
+
|
| 156 |
+
Args:
|
| 157 |
+
audio_path: Path to audio file
|
| 158 |
+
|
| 159 |
+
Returns:
|
| 160 |
+
AudioFeatures object
|
| 161 |
+
"""
|
| 162 |
+
audio, sr = self.load_audio(audio_path)
|
| 163 |
+
return self.extract_features(audio, sr)
|
| 164 |
+
|
| 165 |
+
class AudioCaptioner:
|
| 166 |
+
"""Audio-to-text captioning using OpenAI Whisper"""
|
| 167 |
+
|
| 168 |
+
def __init__(self, model_size: str = "base", device: str = "auto"):
|
| 169 |
+
"""
|
| 170 |
+
Initialize audio captioner
|
| 171 |
+
|
| 172 |
+
Args:
|
| 173 |
+
model_size: Whisper model size (tiny, base, small, medium, large)
|
| 174 |
+
device: Device to run on (auto, cpu, cuda)
|
| 175 |
+
"""
|
| 176 |
+
self.model_size = model_size
|
| 177 |
+
self.device = device
|
| 178 |
+
self._model = None
|
| 179 |
+
|
| 180 |
+
def _load_model(self):
|
| 181 |
+
"""Lazy load Whisper model"""
|
| 182 |
+
if self._model is None:
|
| 183 |
+
try:
|
| 184 |
+
import whisper
|
| 185 |
+
self._model = whisper.load_model(self.model_size, device=self.device)
|
| 186 |
+
logger.info(f"Loaded Whisper model: {self.model_size}")
|
| 187 |
+
except ImportError:
|
| 188 |
+
logger.error("OpenAI Whisper not installed. Install with: pip install openai-whisper")
|
| 189 |
+
raise
|
| 190 |
+
except Exception as e:
|
| 191 |
+
logger.error(f"Error loading Whisper model: {e}")
|
| 192 |
+
raise
|
| 193 |
+
|
| 194 |
+
def caption_audio(self, audio_path: str, language: str = "en") -> str:
|
| 195 |
+
"""
|
| 196 |
+
Generate text caption from audio
|
| 197 |
+
|
| 198 |
+
Args:
|
| 199 |
+
audio_path: Path to audio file
|
| 200 |
+
language: Language code for transcription
|
| 201 |
+
|
| 202 |
+
Returns:
|
| 203 |
+
Text caption of the audio content
|
| 204 |
+
"""
|
| 205 |
+
self._load_model()
|
| 206 |
+
|
| 207 |
+
try:
|
| 208 |
+
import whisper
|
| 209 |
+
|
| 210 |
+
# Load and preprocess audio for Whisper
|
| 211 |
+
audio = whisper.load_audio(audio_path)
|
| 212 |
+
audio = whisper.pad_or_trim(audio)
|
| 213 |
+
|
| 214 |
+
# Generate mel spectrogram
|
| 215 |
+
mel = whisper.log_mel_spectrogram(audio).to(self._model.device)
|
| 216 |
+
|
| 217 |
+
# Decode audio
|
| 218 |
+
options = whisper.DecodingOptions(language=language, fp16=False)
|
| 219 |
+
result = whisper.decode(self._model, mel, options)
|
| 220 |
+
|
| 221 |
+
caption = result.text.strip()
|
| 222 |
+
logger.info(f"Generated audio caption: '{caption[:50]}...'")
|
| 223 |
+
|
| 224 |
+
return caption
|
| 225 |
+
|
| 226 |
+
except Exception as e:
|
| 227 |
+
logger.error(f"Error captioning audio: {e}")
|
| 228 |
+
return ""
|
| 229 |
+
|
| 230 |
+
class MultimodalPromptFusion:
|
| 231 |
+
"""Intelligent fusion of text prompts with audio features and captions"""
|
| 232 |
+
|
| 233 |
+
def __init__(self):
|
| 234 |
+
"""Initialize prompt fusion system"""
|
| 235 |
+
pass
|
| 236 |
+
|
| 237 |
+
def fuse_prompt_with_audio(
|
| 238 |
+
self,
|
| 239 |
+
text_prompt: str,
|
| 240 |
+
style: str,
|
| 241 |
+
mood: str,
|
| 242 |
+
audio_features: AudioFeatures,
|
| 243 |
+
audio_caption: str = ""
|
| 244 |
+
) -> str:
|
| 245 |
+
"""
|
| 246 |
+
Create enhanced prompt by fusing text with audio analysis
|
| 247 |
+
|
| 248 |
+
Args:
|
| 249 |
+
text_prompt: Original text prompt
|
| 250 |
+
style: Art style
|
| 251 |
+
mood: Mood/atmosphere
|
| 252 |
+
audio_features: Extracted audio features
|
| 253 |
+
audio_caption: Audio caption from Whisper
|
| 254 |
+
|
| 255 |
+
Returns:
|
| 256 |
+
Enhanced multimodal prompt
|
| 257 |
+
"""
|
| 258 |
+
# Start with base prompt
|
| 259 |
+
enhanced_prompt = text_prompt.strip()
|
| 260 |
+
|
| 261 |
+
# Add style and mood
|
| 262 |
+
if style:
|
| 263 |
+
enhanced_prompt += f", {style}"
|
| 264 |
+
if mood:
|
| 265 |
+
enhanced_prompt += f", {mood}"
|
| 266 |
+
|
| 267 |
+
# Add audio caption if available
|
| 268 |
+
if audio_caption:
|
| 269 |
+
enhanced_prompt += f", inspired by the sound of: {audio_caption}"
|
| 270 |
+
|
| 271 |
+
# Add tempo-based descriptors
|
| 272 |
+
if audio_features.tempo < 80:
|
| 273 |
+
enhanced_prompt += ", slow and contemplative"
|
| 274 |
+
elif audio_features.tempo > 140:
|
| 275 |
+
enhanced_prompt += ", fast-paced and energetic"
|
| 276 |
+
elif audio_features.tempo > 120:
|
| 277 |
+
enhanced_prompt += ", upbeat and dynamic"
|
| 278 |
+
|
| 279 |
+
# Add energy-based descriptors
|
| 280 |
+
if audio_features.energy > 0.05:
|
| 281 |
+
enhanced_prompt += ", vibrant and powerful"
|
| 282 |
+
elif audio_features.energy < 0.02:
|
| 283 |
+
enhanced_prompt += ", gentle and subtle"
|
| 284 |
+
|
| 285 |
+
# Add rhythm-based descriptors
|
| 286 |
+
if audio_features.zero_crossing_rate > 0.15:
|
| 287 |
+
enhanced_prompt += ", rhythmic and percussive"
|
| 288 |
+
|
| 289 |
+
# Add tonal descriptors based on spectral features
|
| 290 |
+
if audio_features.spectral_centroid > 3000:
|
| 291 |
+
enhanced_prompt += ", bright and crisp"
|
| 292 |
+
elif audio_features.spectral_centroid < 1500:
|
| 293 |
+
enhanced_prompt += ", warm and deep"
|
| 294 |
+
|
| 295 |
+
logger.info(f"Enhanced prompt: {enhanced_prompt}")
|
| 296 |
+
return enhanced_prompt
|
| 297 |
+
|
| 298 |
+
def generate_audio_tags(self, audio_features: AudioFeatures) -> List[str]:
|
| 299 |
+
"""
|
| 300 |
+
Generate descriptive tags based on audio features
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
audio_features: Extracted audio features
|
| 304 |
+
|
| 305 |
+
Returns:
|
| 306 |
+
List of descriptive tags
|
| 307 |
+
"""
|
| 308 |
+
tags = []
|
| 309 |
+
|
| 310 |
+
# Tempo tags
|
| 311 |
+
if audio_features.tempo < 60:
|
| 312 |
+
tags.append("very_slow")
|
| 313 |
+
elif audio_features.tempo < 90:
|
| 314 |
+
tags.append("slow")
|
| 315 |
+
elif audio_features.tempo < 120:
|
| 316 |
+
tags.append("moderate")
|
| 317 |
+
elif audio_features.tempo < 140:
|
| 318 |
+
tags.append("fast")
|
| 319 |
+
else:
|
| 320 |
+
tags.append("very_fast")
|
| 321 |
+
|
| 322 |
+
# Energy tags
|
| 323 |
+
if audio_features.energy > 0.06:
|
| 324 |
+
tags.append("high_energy")
|
| 325 |
+
elif audio_features.energy > 0.03:
|
| 326 |
+
tags.append("medium_energy")
|
| 327 |
+
else:
|
| 328 |
+
tags.append("low_energy")
|
| 329 |
+
|
| 330 |
+
# Rhythm tags
|
| 331 |
+
if audio_features.zero_crossing_rate > 0.15:
|
| 332 |
+
tags.append("percussive")
|
| 333 |
+
elif audio_features.zero_crossing_rate < 0.05:
|
| 334 |
+
tags.append("smooth")
|
| 335 |
+
|
| 336 |
+
# Spectral tags
|
| 337 |
+
if audio_features.spectral_centroid > 3000:
|
| 338 |
+
tags.append("bright")
|
| 339 |
+
elif audio_features.spectral_centroid < 1500:
|
| 340 |
+
tags.append("dark")
|
| 341 |
+
|
| 342 |
+
return tags
|
src/utils/data_utils.py
ADDED
|
@@ -0,0 +1,654 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Data Processing Utilities
|
| 3 |
+
|
| 4 |
+
This module provides utilities for Phase 2.B: Data/Logic Input Integration
|
| 5 |
+
- CSV data analysis and processing
|
| 6 |
+
- Mathematical formula evaluation
|
| 7 |
+
- Data-to-text conversion (poetic descriptions)
|
| 8 |
+
- Data visualization generation
|
| 9 |
+
- Statistical analysis and pattern detection
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import io
|
| 14 |
+
import ast
|
| 15 |
+
import math
|
| 16 |
+
import numpy as np
|
| 17 |
+
import pandas as pd
|
| 18 |
+
import matplotlib
|
| 19 |
+
matplotlib.use('Agg') # Use non-interactive backend for Streamlit
|
| 20 |
+
import matplotlib.pyplot as plt
|
| 21 |
+
import seaborn as sns
|
| 22 |
+
from typing import Dict, List, Optional, Tuple, Union, Any
|
| 23 |
+
from dataclasses import dataclass
|
| 24 |
+
from PIL import Image
|
| 25 |
+
import logging
|
| 26 |
+
|
| 27 |
+
logger = logging.getLogger(__name__)
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class DataFeatures:
|
| 31 |
+
"""Container for extracted data features and statistics"""
|
| 32 |
+
|
| 33 |
+
# Basic properties
|
| 34 |
+
shape: Tuple[int, int]
|
| 35 |
+
columns: List[str]
|
| 36 |
+
numeric_columns: List[str]
|
| 37 |
+
data_types: Dict[str, str]
|
| 38 |
+
|
| 39 |
+
# Statistical features
|
| 40 |
+
means: Dict[str, float]
|
| 41 |
+
medians: Dict[str, float]
|
| 42 |
+
stds: Dict[str, float]
|
| 43 |
+
mins: Dict[str, float]
|
| 44 |
+
maxs: Dict[str, float]
|
| 45 |
+
ranges: Dict[str, float]
|
| 46 |
+
|
| 47 |
+
# Pattern features
|
| 48 |
+
trends: Dict[str, str] # 'increasing', 'decreasing', 'stable', 'volatile'
|
| 49 |
+
correlations: Dict[str, float] # strongest correlations
|
| 50 |
+
seasonality: Dict[str, bool] # detected patterns
|
| 51 |
+
|
| 52 |
+
# Derived insights
|
| 53 |
+
complexity_score: float # 0-1 measure of data complexity
|
| 54 |
+
variability_score: float # 0-1 measure of data variability
|
| 55 |
+
pattern_strength: float # 0-1 measure of detectable patterns
|
| 56 |
+
|
| 57 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 58 |
+
"""Convert to dictionary for JSON serialization"""
|
| 59 |
+
return {
|
| 60 |
+
'shape': self.shape,
|
| 61 |
+
'columns': self.columns,
|
| 62 |
+
'numeric_columns': self.numeric_columns,
|
| 63 |
+
'data_types': self.data_types,
|
| 64 |
+
'means': self.means,
|
| 65 |
+
'medians': self.medians,
|
| 66 |
+
'stds': self.stds,
|
| 67 |
+
'mins': self.mins,
|
| 68 |
+
'maxs': self.maxs,
|
| 69 |
+
'ranges': self.ranges,
|
| 70 |
+
'trends': self.trends,
|
| 71 |
+
'correlations': self.correlations,
|
| 72 |
+
'seasonality': self.seasonality,
|
| 73 |
+
'complexity_score': self.complexity_score,
|
| 74 |
+
'variability_score': self.variability_score,
|
| 75 |
+
'pattern_strength': self.pattern_strength
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
class DataProcessor:
|
| 79 |
+
"""Core data processing and analysis functionality"""
|
| 80 |
+
|
| 81 |
+
def __init__(self):
|
| 82 |
+
"""Initialize the data processor"""
|
| 83 |
+
self.safe_functions = {
|
| 84 |
+
# Math functions
|
| 85 |
+
'abs': abs, 'round': round, 'min': min, 'max': max,
|
| 86 |
+
'sum': sum, 'len': len, 'pow': pow,
|
| 87 |
+
|
| 88 |
+
# NumPy functions
|
| 89 |
+
'np': np, 'numpy': np,
|
| 90 |
+
'sin': np.sin, 'cos': np.cos, 'tan': np.tan,
|
| 91 |
+
'exp': np.exp, 'log': np.log, 'sqrt': np.sqrt,
|
| 92 |
+
'pi': np.pi, 'e': np.e,
|
| 93 |
+
|
| 94 |
+
# Math module functions
|
| 95 |
+
'math': math,
|
| 96 |
+
|
| 97 |
+
# Restricted builtins
|
| 98 |
+
'__builtins__': {}
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
def analyze_csv_data(self, df: pd.DataFrame) -> DataFeatures:
|
| 102 |
+
"""
|
| 103 |
+
Comprehensive analysis of CSV data
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
df: Input DataFrame
|
| 107 |
+
|
| 108 |
+
Returns:
|
| 109 |
+
DataFeatures object with extracted insights
|
| 110 |
+
"""
|
| 111 |
+
logger.info(f"Analyzing CSV data with shape {df.shape}")
|
| 112 |
+
|
| 113 |
+
# Basic properties
|
| 114 |
+
shape = df.shape
|
| 115 |
+
columns = df.columns.tolist()
|
| 116 |
+
numeric_df = df.select_dtypes(include=[np.number])
|
| 117 |
+
numeric_columns = numeric_df.columns.tolist()
|
| 118 |
+
data_types = {col: str(df[col].dtype) for col in columns}
|
| 119 |
+
|
| 120 |
+
# Statistical features
|
| 121 |
+
means = {col: float(numeric_df[col].mean()) for col in numeric_columns}
|
| 122 |
+
medians = {col: float(numeric_df[col].median()) for col in numeric_columns}
|
| 123 |
+
stds = {col: float(numeric_df[col].std()) for col in numeric_columns}
|
| 124 |
+
mins = {col: float(numeric_df[col].min()) for col in numeric_columns}
|
| 125 |
+
maxs = {col: float(numeric_df[col].max()) for col in numeric_columns}
|
| 126 |
+
ranges = {col: maxs[col] - mins[col] for col in numeric_columns}
|
| 127 |
+
|
| 128 |
+
# Pattern analysis
|
| 129 |
+
trends = self._analyze_trends(numeric_df)
|
| 130 |
+
correlations = self._find_strongest_correlations(numeric_df)
|
| 131 |
+
seasonality = self._detect_seasonality(numeric_df)
|
| 132 |
+
|
| 133 |
+
# Derived scores
|
| 134 |
+
complexity_score = self._calculate_complexity_score(numeric_df)
|
| 135 |
+
variability_score = self._calculate_variability_score(stds, ranges)
|
| 136 |
+
pattern_strength = self._calculate_pattern_strength(trends, correlations)
|
| 137 |
+
|
| 138 |
+
return DataFeatures(
|
| 139 |
+
shape=shape,
|
| 140 |
+
columns=columns,
|
| 141 |
+
numeric_columns=numeric_columns,
|
| 142 |
+
data_types=data_types,
|
| 143 |
+
means=means,
|
| 144 |
+
medians=medians,
|
| 145 |
+
stds=stds,
|
| 146 |
+
mins=mins,
|
| 147 |
+
maxs=maxs,
|
| 148 |
+
ranges=ranges,
|
| 149 |
+
trends=trends,
|
| 150 |
+
correlations=correlations,
|
| 151 |
+
seasonality=seasonality,
|
| 152 |
+
complexity_score=complexity_score,
|
| 153 |
+
variability_score=variability_score,
|
| 154 |
+
pattern_strength=pattern_strength
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
def evaluate_formula(self, formula: str, num_points: int = 100) -> Tuple[np.ndarray, Dict[str, Any]]:
|
| 158 |
+
"""
|
| 159 |
+
Safely evaluate mathematical formula
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
formula: Mathematical expression (Python/NumPy syntax)
|
| 163 |
+
num_points: Number of points to generate
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
Tuple of (result_array, metadata)
|
| 167 |
+
"""
|
| 168 |
+
logger.info(f"Evaluating formula: {formula}")
|
| 169 |
+
|
| 170 |
+
try:
|
| 171 |
+
# Create default x values if not specified in formula
|
| 172 |
+
if 'x' in formula and 'linspace' not in formula and 'arange' not in formula:
|
| 173 |
+
# Add default x range if x is used but not defined
|
| 174 |
+
x = np.linspace(0, 10, num_points)
|
| 175 |
+
self.safe_functions['x'] = x
|
| 176 |
+
|
| 177 |
+
# Evaluate the formula
|
| 178 |
+
result = eval(formula, self.safe_functions)
|
| 179 |
+
|
| 180 |
+
# Ensure result is a numpy array
|
| 181 |
+
if not isinstance(result, np.ndarray):
|
| 182 |
+
if isinstance(result, (list, tuple)):
|
| 183 |
+
result = np.array(result)
|
| 184 |
+
else:
|
| 185 |
+
# Single value - create array
|
| 186 |
+
result = np.full(num_points, result)
|
| 187 |
+
|
| 188 |
+
# Analyze the result
|
| 189 |
+
metadata = {
|
| 190 |
+
'length': len(result),
|
| 191 |
+
'min': float(np.min(result)),
|
| 192 |
+
'max': float(np.max(result)),
|
| 193 |
+
'mean': float(np.mean(result)),
|
| 194 |
+
'std': float(np.std(result)),
|
| 195 |
+
'range': float(np.max(result) - np.min(result)),
|
| 196 |
+
'formula': formula,
|
| 197 |
+
'has_pattern': self._detect_mathematical_pattern(result)
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
return result, metadata
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
logger.error(f"Formula evaluation failed: {e}")
|
| 204 |
+
raise ValueError(f"Invalid formula: {e}")
|
| 205 |
+
|
| 206 |
+
def _analyze_trends(self, df: pd.DataFrame) -> Dict[str, str]:
|
| 207 |
+
"""Analyze trends in numeric columns"""
|
| 208 |
+
trends = {}
|
| 209 |
+
for col in df.columns:
|
| 210 |
+
values = df[col].dropna()
|
| 211 |
+
if len(values) < 3:
|
| 212 |
+
trends[col] = 'insufficient_data'
|
| 213 |
+
continue
|
| 214 |
+
|
| 215 |
+
# Calculate trend using linear regression slope
|
| 216 |
+
x = np.arange(len(values))
|
| 217 |
+
slope = np.polyfit(x, values, 1)[0]
|
| 218 |
+
std_val = values.std()
|
| 219 |
+
|
| 220 |
+
if abs(slope) < std_val * 0.1:
|
| 221 |
+
trends[col] = 'stable'
|
| 222 |
+
elif std_val > values.mean() * 0.5:
|
| 223 |
+
trends[col] = 'volatile'
|
| 224 |
+
elif slope > 0:
|
| 225 |
+
trends[col] = 'increasing'
|
| 226 |
+
else:
|
| 227 |
+
trends[col] = 'decreasing'
|
| 228 |
+
|
| 229 |
+
return trends
|
| 230 |
+
|
| 231 |
+
def _find_strongest_correlations(self, df: pd.DataFrame) -> Dict[str, float]:
|
| 232 |
+
"""Find strongest correlations between columns"""
|
| 233 |
+
if len(df.columns) < 2:
|
| 234 |
+
return {}
|
| 235 |
+
|
| 236 |
+
corr_matrix = df.corr()
|
| 237 |
+
correlations = {}
|
| 238 |
+
|
| 239 |
+
for i, col1 in enumerate(df.columns):
|
| 240 |
+
for j, col2 in enumerate(df.columns):
|
| 241 |
+
if i < j: # Avoid duplicates and self-correlation
|
| 242 |
+
corr_val = corr_matrix.loc[col1, col2]
|
| 243 |
+
if not np.isnan(corr_val):
|
| 244 |
+
correlations[f"{col1}_vs_{col2}"] = float(corr_val)
|
| 245 |
+
|
| 246 |
+
# Return top 3 strongest correlations
|
| 247 |
+
sorted_corr = sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)
|
| 248 |
+
return dict(sorted_corr[:3])
|
| 249 |
+
|
| 250 |
+
def _detect_seasonality(self, df: pd.DataFrame) -> Dict[str, bool]:
|
| 251 |
+
"""Simple seasonality detection"""
|
| 252 |
+
seasonality = {}
|
| 253 |
+
for col in df.columns:
|
| 254 |
+
values = df[col].dropna()
|
| 255 |
+
if len(values) < 12: # Need at least 12 points for seasonality
|
| 256 |
+
seasonality[col] = False
|
| 257 |
+
continue
|
| 258 |
+
|
| 259 |
+
# Simple autocorrelation check
|
| 260 |
+
try:
|
| 261 |
+
autocorr = np.corrcoef(values[:-1], values[1:])[0, 1]
|
| 262 |
+
seasonality[col] = not np.isnan(autocorr) and abs(autocorr) > 0.3
|
| 263 |
+
except:
|
| 264 |
+
seasonality[col] = False
|
| 265 |
+
|
| 266 |
+
return seasonality
|
| 267 |
+
|
| 268 |
+
def _calculate_complexity_score(self, df: pd.DataFrame) -> float:
|
| 269 |
+
"""Calculate data complexity score (0-1)"""
|
| 270 |
+
if df.empty:
|
| 271 |
+
return 0.0
|
| 272 |
+
|
| 273 |
+
# Factors: number of columns, data types variety, missing values
|
| 274 |
+
num_cols = len(df.columns)
|
| 275 |
+
col_score = min(num_cols / 10, 1.0) # Normalize to 0-1
|
| 276 |
+
|
| 277 |
+
# Missing data complexity
|
| 278 |
+
missing_ratio = df.isnull().sum().sum() / (df.shape[0] * df.shape[1])
|
| 279 |
+
missing_score = min(missing_ratio * 2, 1.0)
|
| 280 |
+
|
| 281 |
+
return (col_score + missing_score) / 2
|
| 282 |
+
|
| 283 |
+
def _calculate_variability_score(self, stds: Dict[str, float], ranges: Dict[str, float]) -> float:
|
| 284 |
+
"""Calculate data variability score (0-1)"""
|
| 285 |
+
if not stds:
|
| 286 |
+
return 0.0
|
| 287 |
+
|
| 288 |
+
# Normalize standard deviations by their ranges
|
| 289 |
+
normalized_vars = []
|
| 290 |
+
for col in stds:
|
| 291 |
+
if ranges[col] > 0:
|
| 292 |
+
normalized_vars.append(stds[col] / ranges[col])
|
| 293 |
+
|
| 294 |
+
if not normalized_vars:
|
| 295 |
+
return 0.0
|
| 296 |
+
|
| 297 |
+
return min(np.mean(normalized_vars) * 2, 1.0)
|
| 298 |
+
|
| 299 |
+
def _calculate_pattern_strength(self, trends: Dict[str, str], correlations: Dict[str, float]) -> float:
|
| 300 |
+
"""Calculate pattern strength score (0-1)"""
|
| 301 |
+
pattern_score = 0.0
|
| 302 |
+
|
| 303 |
+
# Trend strength
|
| 304 |
+
trend_patterns = sum(1 for trend in trends.values() if trend in ['increasing', 'decreasing'])
|
| 305 |
+
trend_score = min(trend_patterns / max(len(trends), 1), 1.0)
|
| 306 |
+
|
| 307 |
+
# Correlation strength
|
| 308 |
+
if correlations:
|
| 309 |
+
max_corr = max(abs(corr) for corr in correlations.values())
|
| 310 |
+
corr_score = max_corr
|
| 311 |
+
else:
|
| 312 |
+
corr_score = 0.0
|
| 313 |
+
|
| 314 |
+
return (trend_score + corr_score) / 2
|
| 315 |
+
|
| 316 |
+
def _detect_mathematical_pattern(self, data: np.ndarray) -> bool:
|
| 317 |
+
"""Detect if mathematical data has recognizable patterns"""
|
| 318 |
+
if len(data) < 10:
|
| 319 |
+
return False
|
| 320 |
+
|
| 321 |
+
# Check for periodicity using autocorrelation
|
| 322 |
+
try:
|
| 323 |
+
# Simple pattern detection
|
| 324 |
+
autocorr = np.corrcoef(data[:-1], data[1:])[0, 1]
|
| 325 |
+
return not np.isnan(autocorr) and abs(autocorr) > 0.5
|
| 326 |
+
except:
|
| 327 |
+
return False
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
class DataToTextConverter:
|
| 331 |
+
"""Convert data patterns into poetic/narrative text descriptions"""
|
| 332 |
+
|
| 333 |
+
def __init__(self):
|
| 334 |
+
"""Initialize the converter with descriptive vocabularies"""
|
| 335 |
+
self.trend_descriptions = {
|
| 336 |
+
'increasing': ['ascending', 'rising', 'climbing', 'growing', 'soaring'],
|
| 337 |
+
'decreasing': ['descending', 'falling', 'declining', 'diminishing', 'fading'],
|
| 338 |
+
'stable': ['steady', 'constant', 'balanced', 'harmonious', 'peaceful'],
|
| 339 |
+
'volatile': ['chaotic', 'turbulent', 'dynamic', 'energetic', 'wild']
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
self.pattern_adjectives = {
|
| 343 |
+
'high_complexity': ['intricate', 'complex', 'sophisticated', 'elaborate'],
|
| 344 |
+
'low_complexity': ['simple', 'pure', 'minimal', 'clean'],
|
| 345 |
+
'high_variability': ['diverse', 'varied', 'rich', 'multifaceted'],
|
| 346 |
+
'low_variability': ['consistent', 'uniform', 'regular', 'predictable'],
|
| 347 |
+
'strong_patterns': ['rhythmic', 'structured', 'organized', 'patterned'],
|
| 348 |
+
'weak_patterns': ['random', 'scattered', 'free-flowing', 'organic']
|
| 349 |
+
}
|
| 350 |
+
|
| 351 |
+
self.artistic_metaphors = [
|
| 352 |
+
'like brushstrokes on a canvas',
|
| 353 |
+
'resembling musical notes in harmony',
|
| 354 |
+
'flowing like water through landscapes',
|
| 355 |
+
'dancing with mathematical precision',
|
| 356 |
+
'weaving patterns of light and shadow',
|
| 357 |
+
'creating symphonies of numbers',
|
| 358 |
+
'painting stories with data points',
|
| 359 |
+
'sculpting meaning from statistics'
|
| 360 |
+
]
|
| 361 |
+
|
| 362 |
+
def generate_poetic_description(self, features: DataFeatures) -> str:
|
| 363 |
+
"""
|
| 364 |
+
Generate poetic description from data features
|
| 365 |
+
|
| 366 |
+
Args:
|
| 367 |
+
features: DataFeatures object
|
| 368 |
+
|
| 369 |
+
Returns:
|
| 370 |
+
Poetic text description
|
| 371 |
+
"""
|
| 372 |
+
descriptions = []
|
| 373 |
+
|
| 374 |
+
# Basic data description
|
| 375 |
+
descriptions.append(f"A tapestry woven from {features.shape[0]} data points across {features.shape[1]} dimensions")
|
| 376 |
+
|
| 377 |
+
# Trend descriptions
|
| 378 |
+
trend_desc = self._describe_trends(features.trends)
|
| 379 |
+
if trend_desc:
|
| 380 |
+
descriptions.append(trend_desc)
|
| 381 |
+
|
| 382 |
+
# Variability description
|
| 383 |
+
var_desc = self._describe_variability(features.variability_score)
|
| 384 |
+
if var_desc:
|
| 385 |
+
descriptions.append(var_desc)
|
| 386 |
+
|
| 387 |
+
# Pattern description
|
| 388 |
+
pattern_desc = self._describe_patterns(features.pattern_strength, features.correlations)
|
| 389 |
+
if pattern_desc:
|
| 390 |
+
descriptions.append(pattern_desc)
|
| 391 |
+
|
| 392 |
+
# Add artistic metaphor
|
| 393 |
+
import random
|
| 394 |
+
metaphor = random.choice(self.artistic_metaphors)
|
| 395 |
+
descriptions.append(f"The data flows {metaphor}")
|
| 396 |
+
|
| 397 |
+
return '. '.join(descriptions) + '.'
|
| 398 |
+
|
| 399 |
+
def generate_formula_description(self, formula: str, metadata: Dict[str, Any]) -> str:
|
| 400 |
+
"""
|
| 401 |
+
Generate poetic description for mathematical formula
|
| 402 |
+
|
| 403 |
+
Args:
|
| 404 |
+
formula: Original formula
|
| 405 |
+
metadata: Formula evaluation metadata
|
| 406 |
+
|
| 407 |
+
Returns:
|
| 408 |
+
Poetic text description
|
| 409 |
+
"""
|
| 410 |
+
descriptions = []
|
| 411 |
+
|
| 412 |
+
# Formula introduction
|
| 413 |
+
descriptions.append(f"Mathematical harmony emerges from the expression: {formula}")
|
| 414 |
+
|
| 415 |
+
# Range description
|
| 416 |
+
range_val = metadata['range']
|
| 417 |
+
if range_val > 10:
|
| 418 |
+
descriptions.append("The function soars across vast numerical landscapes")
|
| 419 |
+
elif range_val > 1:
|
| 420 |
+
descriptions.append("Values dance within moderate bounds")
|
| 421 |
+
else:
|
| 422 |
+
descriptions.append("Numbers whisper in gentle, subtle variations")
|
| 423 |
+
|
| 424 |
+
# Pattern description
|
| 425 |
+
if metadata['has_pattern']:
|
| 426 |
+
descriptions.append("Revealing intricate patterns that speak to the soul")
|
| 427 |
+
else:
|
| 428 |
+
descriptions.append("Creating unique, unrepeatable mathematical poetry")
|
| 429 |
+
|
| 430 |
+
# Add artistic metaphor
|
| 431 |
+
import random
|
| 432 |
+
metaphor = random.choice(self.artistic_metaphors)
|
| 433 |
+
descriptions.append(f"Each calculation {metaphor}")
|
| 434 |
+
|
| 435 |
+
return '. '.join(descriptions) + '.'
|
| 436 |
+
|
| 437 |
+
def _describe_trends(self, trends: Dict[str, str]) -> str:
|
| 438 |
+
"""Describe overall trends in the data"""
|
| 439 |
+
if not trends:
|
| 440 |
+
return ""
|
| 441 |
+
|
| 442 |
+
trend_counts = {}
|
| 443 |
+
for trend in trends.values():
|
| 444 |
+
trend_counts[trend] = trend_counts.get(trend, 0) + 1
|
| 445 |
+
|
| 446 |
+
dominant_trend = max(trend_counts, key=trend_counts.get)
|
| 447 |
+
|
| 448 |
+
if dominant_trend in self.trend_descriptions:
|
| 449 |
+
import random
|
| 450 |
+
adj = random.choice(self.trend_descriptions[dominant_trend])
|
| 451 |
+
return f"The data reveals {adj} patterns throughout its structure"
|
| 452 |
+
|
| 453 |
+
return ""
|
| 454 |
+
|
| 455 |
+
def _describe_variability(self, variability_score: float) -> str:
|
| 456 |
+
"""Describe data variability"""
|
| 457 |
+
import random
|
| 458 |
+
|
| 459 |
+
if variability_score > 0.7:
|
| 460 |
+
adj = random.choice(self.pattern_adjectives['high_variability'])
|
| 461 |
+
return f"With {adj} expressions of numerical diversity"
|
| 462 |
+
elif variability_score < 0.3:
|
| 463 |
+
adj = random.choice(self.pattern_adjectives['low_variability'])
|
| 464 |
+
return f"Maintaining {adj} elegance in its values"
|
| 465 |
+
else:
|
| 466 |
+
return "Balancing consistency with creative variation"
|
| 467 |
+
|
| 468 |
+
def _describe_patterns(self, pattern_strength: float, correlations: Dict[str, float]) -> str:
|
| 469 |
+
"""Describe pattern strength and correlations"""
|
| 470 |
+
import random
|
| 471 |
+
|
| 472 |
+
if pattern_strength > 0.6:
|
| 473 |
+
adj = random.choice(self.pattern_adjectives['strong_patterns'])
|
| 474 |
+
return f"Displaying {adj} relationships between its elements"
|
| 475 |
+
elif pattern_strength < 0.3:
|
| 476 |
+
adj = random.choice(self.pattern_adjectives['weak_patterns'])
|
| 477 |
+
return f"Embracing {adj} freedom in its numerical expression"
|
| 478 |
+
else:
|
| 479 |
+
return "Weaving subtle connections throughout its numerical fabric"
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
class DataVisualizer:
|
| 483 |
+
"""Create visualizations from data for artistic conditioning"""
|
| 484 |
+
|
| 485 |
+
def __init__(self, style: str = 'artistic'):
|
| 486 |
+
"""
|
| 487 |
+
Initialize visualizer
|
| 488 |
+
|
| 489 |
+
Args:
|
| 490 |
+
style: Visualization style ('artistic', 'scientific', 'minimal')
|
| 491 |
+
"""
|
| 492 |
+
self.style = style
|
| 493 |
+
self.color_palettes = {
|
| 494 |
+
'artistic': ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7'],
|
| 495 |
+
'scientific': ['#2E86AB', '#A23B72', '#F18F01', '#C73E1D', '#592E83'],
|
| 496 |
+
'minimal': ['#2C3E50', '#34495E', '#7F8C8D', '#95A5A6', '#BDC3C7']
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
def create_data_visualization(self, df: pd.DataFrame, features: DataFeatures) -> Image.Image:
|
| 500 |
+
"""
|
| 501 |
+
Create artistic visualization from DataFrame
|
| 502 |
+
|
| 503 |
+
Args:
|
| 504 |
+
df: Input DataFrame
|
| 505 |
+
features: DataFeatures object
|
| 506 |
+
|
| 507 |
+
Returns:
|
| 508 |
+
PIL Image of the visualization
|
| 509 |
+
"""
|
| 510 |
+
plt.style.use('default')
|
| 511 |
+
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
| 512 |
+
fig.suptitle('Data Pattern Visualization', fontsize=16, fontweight='bold')
|
| 513 |
+
|
| 514 |
+
numeric_df = df.select_dtypes(include=[np.number])
|
| 515 |
+
colors = self.color_palettes[self.style]
|
| 516 |
+
|
| 517 |
+
# Plot 1: Line plot of first few columns
|
| 518 |
+
ax1 = axes[0, 0]
|
| 519 |
+
for i, col in enumerate(numeric_df.columns[:3]):
|
| 520 |
+
ax1.plot(numeric_df[col], color=colors[i % len(colors)],
|
| 521 |
+
linewidth=2, alpha=0.8, label=col)
|
| 522 |
+
ax1.set_title('Data Trends', fontweight='bold')
|
| 523 |
+
ax1.legend()
|
| 524 |
+
ax1.grid(True, alpha=0.3)
|
| 525 |
+
|
| 526 |
+
# Plot 2: Distribution/histogram
|
| 527 |
+
ax2 = axes[0, 1]
|
| 528 |
+
if len(numeric_df.columns) > 0:
|
| 529 |
+
col = numeric_df.columns[0]
|
| 530 |
+
ax2.hist(numeric_df[col].dropna(), bins=20, color=colors[0],
|
| 531 |
+
alpha=0.7, edgecolor='black')
|
| 532 |
+
ax2.set_title(f'Distribution: {col}', fontweight='bold')
|
| 533 |
+
ax2.grid(True, alpha=0.3)
|
| 534 |
+
|
| 535 |
+
# Plot 3: Correlation heatmap (if multiple columns)
|
| 536 |
+
ax3 = axes[1, 0]
|
| 537 |
+
if len(numeric_df.columns) > 1:
|
| 538 |
+
corr_matrix = numeric_df.corr()
|
| 539 |
+
im = ax3.imshow(corr_matrix, cmap='RdBu_r', aspect='auto', vmin=-1, vmax=1)
|
| 540 |
+
ax3.set_xticks(range(len(corr_matrix.columns)))
|
| 541 |
+
ax3.set_yticks(range(len(corr_matrix.columns)))
|
| 542 |
+
ax3.set_xticklabels(corr_matrix.columns, rotation=45)
|
| 543 |
+
ax3.set_yticklabels(corr_matrix.columns)
|
| 544 |
+
ax3.set_title('Correlations', fontweight='bold')
|
| 545 |
+
plt.colorbar(im, ax=ax3, shrink=0.8)
|
| 546 |
+
else:
|
| 547 |
+
ax3.text(0.5, 0.5, 'Single Column\nNo Correlations',
|
| 548 |
+
ha='center', va='center', transform=ax3.transAxes)
|
| 549 |
+
ax3.set_title('Correlations', fontweight='bold')
|
| 550 |
+
|
| 551 |
+
# Plot 4: Summary statistics
|
| 552 |
+
ax4 = axes[1, 1]
|
| 553 |
+
if len(numeric_df.columns) > 0:
|
| 554 |
+
stats_data = [features.means[col] for col in numeric_df.columns[:5]]
|
| 555 |
+
bars = ax4.bar(range(len(stats_data)), stats_data, color=colors[:len(stats_data)])
|
| 556 |
+
ax4.set_title('Mean Values', fontweight='bold')
|
| 557 |
+
ax4.set_xticks(range(len(stats_data)))
|
| 558 |
+
ax4.set_xticklabels([col[:8] for col in numeric_df.columns[:5]], rotation=45)
|
| 559 |
+
ax4.grid(True, alpha=0.3)
|
| 560 |
+
|
| 561 |
+
plt.tight_layout()
|
| 562 |
+
|
| 563 |
+
# Convert to PIL Image
|
| 564 |
+
buf = io.BytesIO()
|
| 565 |
+
plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
| 566 |
+
plt.close()
|
| 567 |
+
buf.seek(0)
|
| 568 |
+
|
| 569 |
+
return Image.open(buf)
|
| 570 |
+
|
| 571 |
+
def create_formula_visualization(self, data: np.ndarray, formula: str, metadata: Dict[str, Any]) -> Image.Image:
|
| 572 |
+
"""
|
| 573 |
+
Create artistic visualization from formula result
|
| 574 |
+
|
| 575 |
+
Args:
|
| 576 |
+
data: Formula result array
|
| 577 |
+
formula: Original formula
|
| 578 |
+
metadata: Formula metadata
|
| 579 |
+
|
| 580 |
+
Returns:
|
| 581 |
+
PIL Image of the visualization
|
| 582 |
+
"""
|
| 583 |
+
try:
|
| 584 |
+
logger.info(f"Creating visualization for formula: {formula}")
|
| 585 |
+
logger.info(f"Data shape: {data.shape}, Data range: [{np.min(data):.3f}, {np.max(data):.3f}]")
|
| 586 |
+
|
| 587 |
+
plt.style.use('default')
|
| 588 |
+
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
|
| 589 |
+
fig.suptitle(f'Mathematical Pattern: {formula}', fontsize=14, fontweight='bold')
|
| 590 |
+
|
| 591 |
+
colors = self.color_palettes[self.style]
|
| 592 |
+
x = np.arange(len(data))
|
| 593 |
+
|
| 594 |
+
# Plot 1: Main function plot
|
| 595 |
+
ax1 = axes[0, 0]
|
| 596 |
+
ax1.plot(x, data, color=colors[0], linewidth=3, alpha=0.8)
|
| 597 |
+
ax1.fill_between(x, data, alpha=0.3, color=colors[0])
|
| 598 |
+
ax1.set_title('Function Values', fontweight='bold')
|
| 599 |
+
ax1.grid(True, alpha=0.3)
|
| 600 |
+
|
| 601 |
+
# Plot 2: Derivative approximation
|
| 602 |
+
ax2 = axes[0, 1]
|
| 603 |
+
if len(data) > 1:
|
| 604 |
+
derivative = np.gradient(data)
|
| 605 |
+
ax2.plot(x, derivative, color=colors[1], linewidth=2)
|
| 606 |
+
ax2.set_title('Rate of Change', fontweight='bold')
|
| 607 |
+
ax2.grid(True, alpha=0.3)
|
| 608 |
+
|
| 609 |
+
# Plot 3: Distribution
|
| 610 |
+
ax3 = axes[1, 0]
|
| 611 |
+
ax3.hist(data, bins=30, color=colors[2], alpha=0.7, edgecolor='black')
|
| 612 |
+
ax3.set_title('Value Distribution', fontweight='bold')
|
| 613 |
+
ax3.grid(True, alpha=0.3)
|
| 614 |
+
|
| 615 |
+
# Plot 4: Phase space (if applicable)
|
| 616 |
+
ax4 = axes[1, 1]
|
| 617 |
+
if len(data) > 1:
|
| 618 |
+
ax4.scatter(data[:-1], data[1:], c=x[:-1], cmap='viridis', alpha=0.6)
|
| 619 |
+
ax4.set_xlabel('f(t)')
|
| 620 |
+
ax4.set_ylabel('f(t+1)')
|
| 621 |
+
ax4.set_title('Phase Space', fontweight='bold')
|
| 622 |
+
ax4.grid(True, alpha=0.3)
|
| 623 |
+
|
| 624 |
+
plt.tight_layout()
|
| 625 |
+
|
| 626 |
+
# Convert to PIL Image
|
| 627 |
+
buf = io.BytesIO()
|
| 628 |
+
plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
| 629 |
+
plt.close()
|
| 630 |
+
buf.seek(0)
|
| 631 |
+
|
| 632 |
+
image = Image.open(buf)
|
| 633 |
+
logger.info(f"Successfully created visualization image: {image.size}")
|
| 634 |
+
return image
|
| 635 |
+
|
| 636 |
+
except Exception as e:
|
| 637 |
+
logger.error(f"Error creating formula visualization: {e}")
|
| 638 |
+
plt.close('all') # Clean up any open figures
|
| 639 |
+
|
| 640 |
+
# Return a simple error image
|
| 641 |
+
fig, ax = plt.subplots(figsize=(8, 6))
|
| 642 |
+
ax.text(0.5, 0.5, f'Visualization Error:\n{str(e)}',
|
| 643 |
+
ha='center', va='center', fontsize=12,
|
| 644 |
+
bbox=dict(boxstyle="round,pad=0.3", facecolor="lightcoral"))
|
| 645 |
+
ax.set_xlim(0, 1)
|
| 646 |
+
ax.set_ylim(0, 1)
|
| 647 |
+
ax.axis('off')
|
| 648 |
+
|
| 649 |
+
buf = io.BytesIO()
|
| 650 |
+
plt.savefig(buf, format='png', dpi=150, bbox_inches='tight')
|
| 651 |
+
plt.close()
|
| 652 |
+
buf.seek(0)
|
| 653 |
+
|
| 654 |
+
return Image.open(buf)
|
src/utils/emotion_utils.py
ADDED
|
@@ -0,0 +1,446 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CompI Emotion Processing Utilities
|
| 3 |
+
|
| 4 |
+
This module provides utilities for Phase 2.C: Emotional/Contextual Input Integration
|
| 5 |
+
- Emotion detection and sentiment analysis
|
| 6 |
+
- Mood mapping and emotional context processing
|
| 7 |
+
- Color palette generation based on emotions
|
| 8 |
+
- Contextual prompt enhancement
|
| 9 |
+
- Emoji and text-based emotion recognition
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import re
|
| 13 |
+
import json
|
| 14 |
+
from typing import Dict, List, Optional, Tuple, Union, Any
|
| 15 |
+
from dataclasses import dataclass
|
| 16 |
+
from enum import Enum
|
| 17 |
+
import logging
|
| 18 |
+
|
| 19 |
+
# Optional imports with fallbacks
|
| 20 |
+
try:
|
| 21 |
+
from textblob import TextBlob
|
| 22 |
+
TEXTBLOB_AVAILABLE = True
|
| 23 |
+
except ImportError:
|
| 24 |
+
TEXTBLOB_AVAILABLE = False
|
| 25 |
+
TextBlob = None
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
import emoji
|
| 29 |
+
EMOJI_AVAILABLE = True
|
| 30 |
+
except ImportError:
|
| 31 |
+
EMOJI_AVAILABLE = False
|
| 32 |
+
emoji = None
|
| 33 |
+
|
| 34 |
+
logger = logging.getLogger(__name__)
|
| 35 |
+
|
| 36 |
+
class EmotionCategory(Enum):
|
| 37 |
+
"""Primary emotion categories"""
|
| 38 |
+
JOY = "joy"
|
| 39 |
+
SADNESS = "sadness"
|
| 40 |
+
ANGER = "anger"
|
| 41 |
+
FEAR = "fear"
|
| 42 |
+
SURPRISE = "surprise"
|
| 43 |
+
DISGUST = "disgust"
|
| 44 |
+
LOVE = "love"
|
| 45 |
+
ANTICIPATION = "anticipation"
|
| 46 |
+
TRUST = "trust"
|
| 47 |
+
NEUTRAL = "neutral"
|
| 48 |
+
|
| 49 |
+
@dataclass
|
| 50 |
+
class EmotionAnalysis:
|
| 51 |
+
"""Container for emotion analysis results"""
|
| 52 |
+
|
| 53 |
+
# Primary emotion detection
|
| 54 |
+
primary_emotion: EmotionCategory
|
| 55 |
+
emotion_confidence: float # 0-1 confidence score
|
| 56 |
+
|
| 57 |
+
# Sentiment analysis
|
| 58 |
+
sentiment_polarity: float # -1 to 1 (negative to positive)
|
| 59 |
+
sentiment_subjectivity: float # 0 to 1 (objective to subjective)
|
| 60 |
+
|
| 61 |
+
# Detected emotions with scores
|
| 62 |
+
emotion_scores: Dict[str, float]
|
| 63 |
+
|
| 64 |
+
# Contextual information
|
| 65 |
+
detected_emojis: List[str]
|
| 66 |
+
emotion_keywords: List[str]
|
| 67 |
+
intensity_level: str # 'low', 'medium', 'high'
|
| 68 |
+
|
| 69 |
+
# Generated artistic attributes
|
| 70 |
+
color_palette: List[str]
|
| 71 |
+
artistic_descriptors: List[str]
|
| 72 |
+
mood_modifiers: List[str]
|
| 73 |
+
|
| 74 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 75 |
+
"""Convert to dictionary for JSON serialization"""
|
| 76 |
+
return {
|
| 77 |
+
'primary_emotion': self.primary_emotion.value,
|
| 78 |
+
'emotion_confidence': self.emotion_confidence,
|
| 79 |
+
'sentiment_polarity': self.sentiment_polarity,
|
| 80 |
+
'sentiment_subjectivity': self.sentiment_subjectivity,
|
| 81 |
+
'emotion_scores': self.emotion_scores,
|
| 82 |
+
'detected_emojis': self.detected_emojis,
|
| 83 |
+
'emotion_keywords': self.emotion_keywords,
|
| 84 |
+
'intensity_level': self.intensity_level,
|
| 85 |
+
'color_palette': self.color_palette,
|
| 86 |
+
'artistic_descriptors': self.artistic_descriptors,
|
| 87 |
+
'mood_modifiers': self.mood_modifiers
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
class EmotionProcessor:
|
| 91 |
+
"""Core emotion processing and analysis functionality"""
|
| 92 |
+
|
| 93 |
+
def __init__(self):
|
| 94 |
+
"""Initialize the emotion processor with predefined mappings"""
|
| 95 |
+
|
| 96 |
+
# Predefined emotion sets
|
| 97 |
+
self.preset_emotions = {
|
| 98 |
+
"joyful": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "😊"},
|
| 99 |
+
"happy": {"category": EmotionCategory.JOY, "intensity": "medium", "emoji": "😄"},
|
| 100 |
+
"ecstatic": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "🤩"},
|
| 101 |
+
"sad": {"category": EmotionCategory.SADNESS, "intensity": "medium", "emoji": "😢"},
|
| 102 |
+
"melancholic": {"category": EmotionCategory.SADNESS, "intensity": "high", "emoji": "😔"},
|
| 103 |
+
"depressed": {"category": EmotionCategory.SADNESS, "intensity": "high", "emoji": "😞"},
|
| 104 |
+
"angry": {"category": EmotionCategory.ANGER, "intensity": "high", "emoji": "😡"},
|
| 105 |
+
"frustrated": {"category": EmotionCategory.ANGER, "intensity": "medium", "emoji": "😤"},
|
| 106 |
+
"furious": {"category": EmotionCategory.ANGER, "intensity": "high", "emoji": "🤬"},
|
| 107 |
+
"fearful": {"category": EmotionCategory.FEAR, "intensity": "high", "emoji": "😱"},
|
| 108 |
+
"anxious": {"category": EmotionCategory.FEAR, "intensity": "medium", "emoji": "😰"},
|
| 109 |
+
"nervous": {"category": EmotionCategory.FEAR, "intensity": "low", "emoji": "😬"},
|
| 110 |
+
"surprised": {"category": EmotionCategory.SURPRISE, "intensity": "medium", "emoji": "😲"},
|
| 111 |
+
"amazed": {"category": EmotionCategory.SURPRISE, "intensity": "high", "emoji": "🤯"},
|
| 112 |
+
"romantic": {"category": EmotionCategory.LOVE, "intensity": "high", "emoji": "💖"},
|
| 113 |
+
"loving": {"category": EmotionCategory.LOVE, "intensity": "medium", "emoji": "❤️"},
|
| 114 |
+
"peaceful": {"category": EmotionCategory.TRUST, "intensity": "medium", "emoji": "🕊️"},
|
| 115 |
+
"serene": {"category": EmotionCategory.TRUST, "intensity": "high", "emoji": "🌱"},
|
| 116 |
+
"mysterious": {"category": EmotionCategory.ANTICIPATION, "intensity": "medium", "emoji": "🕵️♂️"},
|
| 117 |
+
"nostalgic": {"category": EmotionCategory.SADNESS, "intensity": "medium", "emoji": "🕰️"},
|
| 118 |
+
"energetic": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "⚡"},
|
| 119 |
+
"whimsical": {"category": EmotionCategory.JOY, "intensity": "medium", "emoji": "🎠"},
|
| 120 |
+
"uplifting": {"category": EmotionCategory.JOY, "intensity": "high", "emoji": "🌞"},
|
| 121 |
+
"dark": {"category": EmotionCategory.SADNESS, "intensity": "high", "emoji": "🌑"},
|
| 122 |
+
"moody": {"category": EmotionCategory.SADNESS, "intensity": "medium", "emoji": "🌫️"}
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
# Emotion-to-color mappings
|
| 126 |
+
self.emotion_colors = {
|
| 127 |
+
EmotionCategory.JOY: ["#FFD700", "#FFA500", "#FF69B4", "#00CED1", "#32CD32"],
|
| 128 |
+
EmotionCategory.SADNESS: ["#4169E1", "#6495ED", "#708090", "#2F4F4F", "#191970"],
|
| 129 |
+
EmotionCategory.ANGER: ["#DC143C", "#B22222", "#8B0000", "#FF4500", "#FF6347"],
|
| 130 |
+
EmotionCategory.FEAR: ["#800080", "#4B0082", "#2E2E2E", "#696969", "#A9A9A9"],
|
| 131 |
+
EmotionCategory.SURPRISE: ["#FF1493", "#FF69B4", "#FFB6C1", "#FFC0CB", "#FFFF00"],
|
| 132 |
+
EmotionCategory.LOVE: ["#FF69B4", "#DC143C", "#FF1493", "#C71585", "#DB7093"],
|
| 133 |
+
EmotionCategory.TRUST: ["#00CED1", "#20B2AA", "#48D1CC", "#40E0D0", "#AFEEEE"],
|
| 134 |
+
EmotionCategory.ANTICIPATION: ["#9370DB", "#8A2BE2", "#7B68EE", "#6A5ACD", "#483D8B"],
|
| 135 |
+
EmotionCategory.NEUTRAL: ["#808080", "#A9A9A9", "#C0C0C0", "#D3D3D3", "#DCDCDC"]
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
# Artistic descriptors for each emotion
|
| 139 |
+
self.artistic_descriptors = {
|
| 140 |
+
EmotionCategory.JOY: ["vibrant", "luminous", "radiant", "effervescent", "sparkling"],
|
| 141 |
+
EmotionCategory.SADNESS: ["muted", "somber", "melancholic", "wistful", "contemplative"],
|
| 142 |
+
EmotionCategory.ANGER: ["intense", "fiery", "bold", "dramatic", "powerful"],
|
| 143 |
+
EmotionCategory.FEAR: ["shadowy", "mysterious", "ethereal", "haunting", "enigmatic"],
|
| 144 |
+
EmotionCategory.SURPRISE: ["dynamic", "explosive", "unexpected", "striking", "vivid"],
|
| 145 |
+
EmotionCategory.LOVE: ["warm", "tender", "passionate", "romantic", "intimate"],
|
| 146 |
+
EmotionCategory.TRUST: ["serene", "peaceful", "harmonious", "balanced", "tranquil"],
|
| 147 |
+
EmotionCategory.ANTICIPATION: ["electric", "suspenseful", "charged", "expectant", "tense"],
|
| 148 |
+
EmotionCategory.NEUTRAL: ["balanced", "calm", "steady", "composed", "neutral"]
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
# Emoji to emotion mapping
|
| 152 |
+
self.emoji_emotions = {
|
| 153 |
+
"😊": EmotionCategory.JOY, "😄": EmotionCategory.JOY, "😃": EmotionCategory.JOY,
|
| 154 |
+
"🤩": EmotionCategory.JOY, "😍": EmotionCategory.LOVE, "🥰": EmotionCategory.LOVE,
|
| 155 |
+
"😢": EmotionCategory.SADNESS, "😭": EmotionCategory.SADNESS, "😔": EmotionCategory.SADNESS,
|
| 156 |
+
"😡": EmotionCategory.ANGER, "🤬": EmotionCategory.ANGER, "😤": EmotionCategory.ANGER,
|
| 157 |
+
"😱": EmotionCategory.FEAR, "😰": EmotionCategory.FEAR, "😨": EmotionCategory.FEAR,
|
| 158 |
+
"😲": EmotionCategory.SURPRISE, "😮": EmotionCategory.SURPRISE, "🤯": EmotionCategory.SURPRISE,
|
| 159 |
+
"❤️": EmotionCategory.LOVE, "💖": EmotionCategory.LOVE, "💕": EmotionCategory.LOVE,
|
| 160 |
+
"🕊️": EmotionCategory.TRUST, "🌱": EmotionCategory.TRUST, "☮️": EmotionCategory.TRUST
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
# Keyword patterns for emotion detection
|
| 164 |
+
self.emotion_keywords = {
|
| 165 |
+
EmotionCategory.JOY: ["happy", "joyful", "cheerful", "delighted", "elated", "euphoric", "blissful"],
|
| 166 |
+
EmotionCategory.SADNESS: ["sad", "depressed", "melancholy", "sorrowful", "gloomy", "dejected"],
|
| 167 |
+
EmotionCategory.ANGER: ["angry", "furious", "rage", "irritated", "annoyed", "livid", "irate"],
|
| 168 |
+
EmotionCategory.FEAR: ["afraid", "scared", "terrified", "anxious", "worried", "nervous", "fearful"],
|
| 169 |
+
EmotionCategory.SURPRISE: ["surprised", "amazed", "astonished", "shocked", "stunned", "bewildered"],
|
| 170 |
+
EmotionCategory.LOVE: ["love", "romantic", "affectionate", "tender", "passionate", "adoring"],
|
| 171 |
+
EmotionCategory.TRUST: ["peaceful", "serene", "calm", "tranquil", "secure", "confident"],
|
| 172 |
+
EmotionCategory.ANTICIPATION: ["excited", "eager", "hopeful", "expectant", "anticipating"]
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
def analyze_emotion(self, text: str, selected_emotion: Optional[str] = None) -> EmotionAnalysis:
|
| 176 |
+
"""
|
| 177 |
+
Comprehensive emotion analysis of input text
|
| 178 |
+
|
| 179 |
+
Args:
|
| 180 |
+
text: Input text to analyze
|
| 181 |
+
selected_emotion: Optional pre-selected emotion
|
| 182 |
+
|
| 183 |
+
Returns:
|
| 184 |
+
EmotionAnalysis object with complete analysis
|
| 185 |
+
"""
|
| 186 |
+
logger.info(f"Analyzing emotion for text: {text[:100]}...")
|
| 187 |
+
|
| 188 |
+
# Initialize analysis components
|
| 189 |
+
detected_emojis = self._extract_emojis(text)
|
| 190 |
+
emotion_keywords = self._extract_emotion_keywords(text)
|
| 191 |
+
|
| 192 |
+
# Determine primary emotion
|
| 193 |
+
if selected_emotion and selected_emotion.lower() in self.preset_emotions:
|
| 194 |
+
# Use selected emotion
|
| 195 |
+
emotion_info = self.preset_emotions[selected_emotion.lower()]
|
| 196 |
+
primary_emotion = emotion_info["category"]
|
| 197 |
+
emotion_confidence = 0.9
|
| 198 |
+
intensity_level = emotion_info["intensity"]
|
| 199 |
+
else:
|
| 200 |
+
# Analyze text for emotion
|
| 201 |
+
primary_emotion, emotion_confidence, intensity_level = self._analyze_text_emotion(text, detected_emojis, emotion_keywords)
|
| 202 |
+
|
| 203 |
+
# Sentiment analysis
|
| 204 |
+
sentiment_polarity, sentiment_subjectivity = self._analyze_sentiment(text)
|
| 205 |
+
|
| 206 |
+
# Generate emotion scores
|
| 207 |
+
emotion_scores = self._generate_emotion_scores(primary_emotion, emotion_confidence)
|
| 208 |
+
|
| 209 |
+
# Generate artistic attributes
|
| 210 |
+
color_palette = self.emotion_colors.get(primary_emotion, self.emotion_colors[EmotionCategory.NEUTRAL])
|
| 211 |
+
artistic_descriptors = self.artistic_descriptors.get(primary_emotion, ["neutral"])
|
| 212 |
+
mood_modifiers = self._generate_mood_modifiers(primary_emotion, intensity_level)
|
| 213 |
+
|
| 214 |
+
return EmotionAnalysis(
|
| 215 |
+
primary_emotion=primary_emotion,
|
| 216 |
+
emotion_confidence=emotion_confidence,
|
| 217 |
+
sentiment_polarity=sentiment_polarity,
|
| 218 |
+
sentiment_subjectivity=sentiment_subjectivity,
|
| 219 |
+
emotion_scores=emotion_scores,
|
| 220 |
+
detected_emojis=detected_emojis,
|
| 221 |
+
emotion_keywords=emotion_keywords,
|
| 222 |
+
intensity_level=intensity_level,
|
| 223 |
+
color_palette=color_palette[:3], # Top 3 colors
|
| 224 |
+
artistic_descriptors=artistic_descriptors[:3], # Top 3 descriptors
|
| 225 |
+
mood_modifiers=mood_modifiers
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
def _extract_emojis(self, text: str) -> List[str]:
|
| 229 |
+
"""Extract emojis from text"""
|
| 230 |
+
if not EMOJI_AVAILABLE:
|
| 231 |
+
# Simple emoji detection using Unicode ranges
|
| 232 |
+
emoji_pattern = re.compile(
|
| 233 |
+
"["
|
| 234 |
+
"\U0001F600-\U0001F64F" # emoticons
|
| 235 |
+
"\U0001F300-\U0001F5FF" # symbols & pictographs
|
| 236 |
+
"\U0001F680-\U0001F6FF" # transport & map symbols
|
| 237 |
+
"\U0001F1E0-\U0001F1FF" # flags (iOS)
|
| 238 |
+
"\U00002702-\U000027B0"
|
| 239 |
+
"\U000024C2-\U0001F251"
|
| 240 |
+
"]+",
|
| 241 |
+
flags=re.UNICODE
|
| 242 |
+
)
|
| 243 |
+
return emoji_pattern.findall(text)
|
| 244 |
+
else:
|
| 245 |
+
return [char for char in text if char in emoji.UNICODE_EMOJI['en']]
|
| 246 |
+
|
| 247 |
+
def _extract_emotion_keywords(self, text: str) -> List[str]:
|
| 248 |
+
"""Extract emotion-related keywords from text"""
|
| 249 |
+
text_lower = text.lower()
|
| 250 |
+
found_keywords = []
|
| 251 |
+
|
| 252 |
+
for emotion, keywords in self.emotion_keywords.items():
|
| 253 |
+
for keyword in keywords:
|
| 254 |
+
if keyword in text_lower:
|
| 255 |
+
found_keywords.append(keyword)
|
| 256 |
+
|
| 257 |
+
return found_keywords
|
| 258 |
+
|
| 259 |
+
def _analyze_text_emotion(self, text: str, emojis: List[str], keywords: List[str]) -> Tuple[EmotionCategory, float, str]:
|
| 260 |
+
"""Analyze emotion from text, emojis, and keywords"""
|
| 261 |
+
|
| 262 |
+
# Check emojis first
|
| 263 |
+
for emoji_char in emojis:
|
| 264 |
+
if emoji_char in self.emoji_emotions:
|
| 265 |
+
return self.emoji_emotions[emoji_char], 0.8, "medium"
|
| 266 |
+
|
| 267 |
+
# Check keywords
|
| 268 |
+
emotion_votes = {}
|
| 269 |
+
for keyword in keywords:
|
| 270 |
+
for emotion, emotion_keywords in self.emotion_keywords.items():
|
| 271 |
+
if keyword in emotion_keywords:
|
| 272 |
+
emotion_votes[emotion] = emotion_votes.get(emotion, 0) + 1
|
| 273 |
+
|
| 274 |
+
if emotion_votes:
|
| 275 |
+
primary_emotion = max(emotion_votes, key=emotion_votes.get)
|
| 276 |
+
confidence = min(emotion_votes[primary_emotion] * 0.3, 0.9)
|
| 277 |
+
intensity = "high" if emotion_votes[primary_emotion] > 2 else "medium"
|
| 278 |
+
return primary_emotion, confidence, intensity
|
| 279 |
+
|
| 280 |
+
# Fallback to sentiment analysis
|
| 281 |
+
sentiment_polarity, _ = self._analyze_sentiment(text)
|
| 282 |
+
|
| 283 |
+
if sentiment_polarity > 0.3:
|
| 284 |
+
return EmotionCategory.JOY, 0.6, "medium"
|
| 285 |
+
elif sentiment_polarity < -0.3:
|
| 286 |
+
return EmotionCategory.SADNESS, 0.6, "medium"
|
| 287 |
+
else:
|
| 288 |
+
return EmotionCategory.NEUTRAL, 0.5, "low"
|
| 289 |
+
|
| 290 |
+
def _analyze_sentiment(self, text: str) -> Tuple[float, float]:
|
| 291 |
+
"""Analyze sentiment using TextBlob or fallback method"""
|
| 292 |
+
if not text.strip():
|
| 293 |
+
return 0.0, 0.0
|
| 294 |
+
|
| 295 |
+
if TEXTBLOB_AVAILABLE:
|
| 296 |
+
try:
|
| 297 |
+
blob = TextBlob(text)
|
| 298 |
+
return blob.sentiment.polarity, blob.sentiment.subjectivity
|
| 299 |
+
except Exception as e:
|
| 300 |
+
logger.warning(f"TextBlob sentiment analysis failed: {e}")
|
| 301 |
+
|
| 302 |
+
# Simple fallback sentiment analysis
|
| 303 |
+
positive_words = ["good", "great", "excellent", "amazing", "wonderful", "fantastic", "love", "like", "happy", "joy"]
|
| 304 |
+
negative_words = ["bad", "terrible", "awful", "hate", "dislike", "sad", "angry", "fear", "worried", "depressed"]
|
| 305 |
+
|
| 306 |
+
text_lower = text.lower()
|
| 307 |
+
positive_count = sum(1 for word in positive_words if word in text_lower)
|
| 308 |
+
negative_count = sum(1 for word in negative_words if word in text_lower)
|
| 309 |
+
|
| 310 |
+
total_words = len(text.split())
|
| 311 |
+
if total_words == 0:
|
| 312 |
+
return 0.0, 0.0
|
| 313 |
+
|
| 314 |
+
polarity = (positive_count - negative_count) / max(total_words, 1)
|
| 315 |
+
subjectivity = (positive_count + negative_count) / max(total_words, 1)
|
| 316 |
+
|
| 317 |
+
return max(-1.0, min(1.0, polarity)), max(0.0, min(1.0, subjectivity))
|
| 318 |
+
|
| 319 |
+
def _generate_emotion_scores(self, primary_emotion: EmotionCategory, confidence: float) -> Dict[str, float]:
|
| 320 |
+
"""Generate scores for all emotions"""
|
| 321 |
+
scores = {emotion.value: 0.1 for emotion in EmotionCategory}
|
| 322 |
+
scores[primary_emotion.value] = confidence
|
| 323 |
+
|
| 324 |
+
# Add some secondary emotions based on primary
|
| 325 |
+
secondary_emotions = {
|
| 326 |
+
EmotionCategory.JOY: [EmotionCategory.LOVE, EmotionCategory.TRUST],
|
| 327 |
+
EmotionCategory.SADNESS: [EmotionCategory.FEAR, EmotionCategory.NEUTRAL],
|
| 328 |
+
EmotionCategory.ANGER: [EmotionCategory.DISGUST, EmotionCategory.FEAR],
|
| 329 |
+
EmotionCategory.FEAR: [EmotionCategory.SADNESS, EmotionCategory.SURPRISE],
|
| 330 |
+
EmotionCategory.LOVE: [EmotionCategory.JOY, EmotionCategory.TRUST],
|
| 331 |
+
EmotionCategory.TRUST: [EmotionCategory.JOY, EmotionCategory.LOVE]
|
| 332 |
+
}
|
| 333 |
+
|
| 334 |
+
if primary_emotion in secondary_emotions:
|
| 335 |
+
for secondary in secondary_emotions[primary_emotion]:
|
| 336 |
+
scores[secondary.value] = min(0.4, confidence * 0.5)
|
| 337 |
+
|
| 338 |
+
return scores
|
| 339 |
+
|
| 340 |
+
def _generate_mood_modifiers(self, emotion: EmotionCategory, intensity: str) -> List[str]:
|
| 341 |
+
"""Generate mood modifiers for prompt enhancement"""
|
| 342 |
+
base_modifiers = {
|
| 343 |
+
EmotionCategory.JOY: ["bright", "cheerful", "uplifting", "radiant"],
|
| 344 |
+
EmotionCategory.SADNESS: ["melancholic", "somber", "wistful", "contemplative"],
|
| 345 |
+
EmotionCategory.ANGER: ["intense", "dramatic", "powerful", "bold"],
|
| 346 |
+
EmotionCategory.FEAR: ["mysterious", "dark", "ethereal", "haunting"],
|
| 347 |
+
EmotionCategory.SURPRISE: ["dynamic", "striking", "unexpected", "vivid"],
|
| 348 |
+
EmotionCategory.LOVE: ["romantic", "warm", "tender", "passionate"],
|
| 349 |
+
EmotionCategory.TRUST: ["peaceful", "serene", "harmonious", "tranquil"],
|
| 350 |
+
EmotionCategory.ANTICIPATION: ["electric", "suspenseful", "charged", "expectant"],
|
| 351 |
+
EmotionCategory.NEUTRAL: ["balanced", "calm", "neutral", "composed"]
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
modifiers = base_modifiers.get(emotion, ["neutral"])
|
| 355 |
+
|
| 356 |
+
# Adjust based on intensity
|
| 357 |
+
if intensity == "high":
|
| 358 |
+
intensity_modifiers = ["very", "extremely", "deeply", "intensely"]
|
| 359 |
+
return [f"{intensity_modifiers[0]} {mod}" for mod in modifiers[:2]]
|
| 360 |
+
elif intensity == "low":
|
| 361 |
+
return [f"subtly {mod}" for mod in modifiers[:2]]
|
| 362 |
+
else:
|
| 363 |
+
return modifiers[:3]
|
| 364 |
+
|
| 365 |
+
|
| 366 |
+
class EmotionalPromptEnhancer:
|
| 367 |
+
"""Enhance prompts with emotional context"""
|
| 368 |
+
|
| 369 |
+
def __init__(self):
|
| 370 |
+
"""Initialize the prompt enhancer"""
|
| 371 |
+
self.emotion_processor = EmotionProcessor()
|
| 372 |
+
|
| 373 |
+
def enhance_prompt_with_emotion(
|
| 374 |
+
self,
|
| 375 |
+
base_prompt: str,
|
| 376 |
+
style: str,
|
| 377 |
+
emotion_analysis: EmotionAnalysis,
|
| 378 |
+
enhancement_strength: float = 0.7
|
| 379 |
+
) -> str:
|
| 380 |
+
"""
|
| 381 |
+
Enhance prompt with emotional context
|
| 382 |
+
|
| 383 |
+
Args:
|
| 384 |
+
base_prompt: Original text prompt
|
| 385 |
+
style: Art style
|
| 386 |
+
emotion_analysis: Emotion analysis results
|
| 387 |
+
enhancement_strength: How strongly to apply emotion (0-1)
|
| 388 |
+
|
| 389 |
+
Returns:
|
| 390 |
+
Enhanced prompt with emotional context
|
| 391 |
+
"""
|
| 392 |
+
enhanced_prompt = base_prompt.strip()
|
| 393 |
+
|
| 394 |
+
# Add style
|
| 395 |
+
if style:
|
| 396 |
+
enhanced_prompt += f", {style}"
|
| 397 |
+
|
| 398 |
+
# Add emotional descriptors based on strength
|
| 399 |
+
if enhancement_strength > 0.5:
|
| 400 |
+
# Strong emotional enhancement
|
| 401 |
+
descriptors = emotion_analysis.artistic_descriptors[:2]
|
| 402 |
+
mood_modifiers = emotion_analysis.mood_modifiers[:2]
|
| 403 |
+
|
| 404 |
+
enhanced_prompt += f", {', '.join(descriptors)}"
|
| 405 |
+
enhanced_prompt += f", with a {', '.join(mood_modifiers)} atmosphere"
|
| 406 |
+
|
| 407 |
+
# Add intensity if high
|
| 408 |
+
if emotion_analysis.intensity_level == "high":
|
| 409 |
+
enhanced_prompt += f", deeply {emotion_analysis.primary_emotion.value}"
|
| 410 |
+
|
| 411 |
+
elif enhancement_strength > 0.2:
|
| 412 |
+
# Moderate emotional enhancement
|
| 413 |
+
descriptor = emotion_analysis.artistic_descriptors[0]
|
| 414 |
+
mood = emotion_analysis.mood_modifiers[0]
|
| 415 |
+
|
| 416 |
+
enhanced_prompt += f", {descriptor}, {mood}"
|
| 417 |
+
|
| 418 |
+
else:
|
| 419 |
+
# Subtle emotional enhancement
|
| 420 |
+
if emotion_analysis.artistic_descriptors:
|
| 421 |
+
enhanced_prompt += f", {emotion_analysis.artistic_descriptors[0]}"
|
| 422 |
+
|
| 423 |
+
return enhanced_prompt
|
| 424 |
+
|
| 425 |
+
def generate_emotion_tags(self, emotion_analysis: EmotionAnalysis) -> List[str]:
|
| 426 |
+
"""Generate descriptive tags for the emotion"""
|
| 427 |
+
tags = []
|
| 428 |
+
|
| 429 |
+
# Primary emotion
|
| 430 |
+
tags.append(emotion_analysis.primary_emotion.value)
|
| 431 |
+
|
| 432 |
+
# Intensity
|
| 433 |
+
tags.append(f"{emotion_analysis.intensity_level}_intensity")
|
| 434 |
+
|
| 435 |
+
# Sentiment
|
| 436 |
+
if emotion_analysis.sentiment_polarity > 0.3:
|
| 437 |
+
tags.append("positive_sentiment")
|
| 438 |
+
elif emotion_analysis.sentiment_polarity < -0.3:
|
| 439 |
+
tags.append("negative_sentiment")
|
| 440 |
+
else:
|
| 441 |
+
tags.append("neutral_sentiment")
|
| 442 |
+
|
| 443 |
+
# Artistic descriptors
|
| 444 |
+
tags.extend(emotion_analysis.artistic_descriptors[:2])
|
| 445 |
+
|
| 446 |
+
return tags
|
src/utils/file_utils.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
File handling utilities for CompI project.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import yaml
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Dict, Any, Union
|
| 9 |
+
from PIL import Image
|
| 10 |
+
import soundfile as sf
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
from src.config import OUTPUTS_DIR
|
| 14 |
+
|
| 15 |
+
def save_image(image: Image.Image, filename: str, subfolder: str = "images") -> Path:
|
| 16 |
+
"""
|
| 17 |
+
Save a PIL Image to the outputs directory.
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
image: PIL Image to save
|
| 21 |
+
filename: Name of the file (with extension)
|
| 22 |
+
subfolder: Subfolder within outputs directory
|
| 23 |
+
|
| 24 |
+
Returns:
|
| 25 |
+
Path to saved file
|
| 26 |
+
"""
|
| 27 |
+
output_dir = OUTPUTS_DIR / subfolder
|
| 28 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 29 |
+
|
| 30 |
+
file_path = output_dir / filename
|
| 31 |
+
image.save(file_path)
|
| 32 |
+
|
| 33 |
+
return file_path
|
| 34 |
+
|
| 35 |
+
def save_audio(audio_data: np.ndarray, filename: str,
|
| 36 |
+
sample_rate: int = 22050, subfolder: str = "audio") -> Path:
|
| 37 |
+
"""
|
| 38 |
+
Save audio data to the outputs directory.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
audio_data: Audio data as numpy array
|
| 42 |
+
filename: Name of the file (with extension)
|
| 43 |
+
sample_rate: Audio sample rate
|
| 44 |
+
subfolder: Subfolder within outputs directory
|
| 45 |
+
|
| 46 |
+
Returns:
|
| 47 |
+
Path to saved file
|
| 48 |
+
"""
|
| 49 |
+
output_dir = OUTPUTS_DIR / subfolder
|
| 50 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 51 |
+
|
| 52 |
+
file_path = output_dir / filename
|
| 53 |
+
sf.write(file_path, audio_data, sample_rate)
|
| 54 |
+
|
| 55 |
+
return file_path
|
| 56 |
+
|
| 57 |
+
def load_config(config_path: Union[str, Path]) -> Dict[str, Any]:
|
| 58 |
+
"""
|
| 59 |
+
Load configuration from JSON or YAML file.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
config_path: Path to configuration file
|
| 63 |
+
|
| 64 |
+
Returns:
|
| 65 |
+
Configuration dictionary
|
| 66 |
+
"""
|
| 67 |
+
config_path = Path(config_path)
|
| 68 |
+
|
| 69 |
+
if not config_path.exists():
|
| 70 |
+
raise FileNotFoundError(f"Configuration file not found: {config_path}")
|
| 71 |
+
|
| 72 |
+
with open(config_path, 'r') as f:
|
| 73 |
+
if config_path.suffix.lower() in ['.yml', '.yaml']:
|
| 74 |
+
return yaml.safe_load(f)
|
| 75 |
+
elif config_path.suffix.lower() == '.json':
|
| 76 |
+
return json.load(f)
|
| 77 |
+
else:
|
| 78 |
+
raise ValueError(f"Unsupported config file format: {config_path.suffix}")
|
| 79 |
+
|
| 80 |
+
def ensure_dir(path: Union[str, Path]) -> Path:
|
| 81 |
+
"""
|
| 82 |
+
Ensure directory exists, create if it doesn't.
|
| 83 |
+
|
| 84 |
+
Args:
|
| 85 |
+
path: Directory path
|
| 86 |
+
|
| 87 |
+
Returns:
|
| 88 |
+
Path object
|
| 89 |
+
"""
|
| 90 |
+
path = Path(path)
|
| 91 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 92 |
+
return path
|
| 93 |
+
|
| 94 |
+
def ensure_directory_exists(path: Union[str, Path]) -> Path:
|
| 95 |
+
"""
|
| 96 |
+
Alias for ensure_dir for backward compatibility.
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
path: Directory path
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
Path object
|
| 103 |
+
"""
|
| 104 |
+
return ensure_dir(path)
|
| 105 |
+
|
| 106 |
+
def generate_filename(prompt: str, style: str = "", mood: str = "",
|
| 107 |
+
seed: int = 0, variation: int = 1,
|
| 108 |
+
has_audio: bool = False, max_length: int = 100) -> str:
|
| 109 |
+
"""
|
| 110 |
+
Generate a descriptive filename for generated images.
|
| 111 |
+
|
| 112 |
+
Args:
|
| 113 |
+
prompt: Text prompt used for generation
|
| 114 |
+
style: Art style
|
| 115 |
+
mood: Mood/atmosphere
|
| 116 |
+
seed: Random seed used
|
| 117 |
+
variation: Variation number
|
| 118 |
+
has_audio: Whether audio was used in generation
|
| 119 |
+
max_length: Maximum filename length
|
| 120 |
+
|
| 121 |
+
Returns:
|
| 122 |
+
Generated filename (without extension)
|
| 123 |
+
"""
|
| 124 |
+
import re
|
| 125 |
+
from datetime import datetime
|
| 126 |
+
|
| 127 |
+
# Clean and truncate prompt
|
| 128 |
+
prompt_clean = re.sub(r'[^\w\s-]', '', prompt.lower())
|
| 129 |
+
prompt_slug = "_".join(prompt_clean.split()[:6])[:30]
|
| 130 |
+
|
| 131 |
+
# Clean style and mood
|
| 132 |
+
style_slug = re.sub(r'[^\w]', '', style.lower())[:10] if style else ""
|
| 133 |
+
mood_slug = re.sub(r'[^\w]', '', mood.lower())[:10] if mood else ""
|
| 134 |
+
|
| 135 |
+
# Timestamp
|
| 136 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 137 |
+
|
| 138 |
+
# Audio indicator
|
| 139 |
+
audio_tag = "_audio" if has_audio else ""
|
| 140 |
+
|
| 141 |
+
# Combine all parts
|
| 142 |
+
parts = [prompt_slug, style_slug, mood_slug, timestamp, f"seed{seed}", f"v{variation}"]
|
| 143 |
+
filename = "_".join(filter(None, parts)) + audio_tag
|
| 144 |
+
|
| 145 |
+
# Truncate if too long
|
| 146 |
+
if len(filename) > max_length:
|
| 147 |
+
filename = filename[:max_length]
|
| 148 |
+
|
| 149 |
+
return filename
|
src/utils/image_utils.py
ADDED
|
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Image processing utilities for CompI Phase 2.E: Style Reference/Example Image Integration
|
| 3 |
+
|
| 4 |
+
This module provides utilities for:
|
| 5 |
+
- Image loading from files and URLs
|
| 6 |
+
- Image validation and preprocessing
|
| 7 |
+
- Style analysis and feature extraction
|
| 8 |
+
- Image format conversion and optimization
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import io
|
| 13 |
+
import requests
|
| 14 |
+
import hashlib
|
| 15 |
+
from typing import Optional, Tuple, Dict, Any, Union, List
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
import logging
|
| 18 |
+
|
| 19 |
+
import torch
|
| 20 |
+
import numpy as np
|
| 21 |
+
from PIL import Image, ImageStat, ImageFilter
|
| 22 |
+
import cv2
|
| 23 |
+
|
| 24 |
+
from src.utils.logging_utils import setup_logger
|
| 25 |
+
|
| 26 |
+
logger = setup_logger(__name__)
|
| 27 |
+
|
| 28 |
+
class ImageProcessor:
|
| 29 |
+
"""
|
| 30 |
+
Handles image loading, validation, and preprocessing for style reference
|
| 31 |
+
"""
|
| 32 |
+
|
| 33 |
+
def __init__(self, max_size: Tuple[int, int] = (1024, 1024)):
|
| 34 |
+
self.max_size = max_size
|
| 35 |
+
self.supported_formats = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'}
|
| 36 |
+
|
| 37 |
+
def load_image_from_url(
|
| 38 |
+
self,
|
| 39 |
+
url: str,
|
| 40 |
+
timeout: int = 10,
|
| 41 |
+
max_file_size: int = 10 * 1024 * 1024 # 10MB
|
| 42 |
+
) -> Optional[Image.Image]:
|
| 43 |
+
"""
|
| 44 |
+
Load image from URL with validation and error handling
|
| 45 |
+
|
| 46 |
+
Args:
|
| 47 |
+
url: Image URL
|
| 48 |
+
timeout: Request timeout in seconds
|
| 49 |
+
max_file_size: Maximum file size in bytes
|
| 50 |
+
|
| 51 |
+
Returns:
|
| 52 |
+
PIL Image or None if failed
|
| 53 |
+
"""
|
| 54 |
+
try:
|
| 55 |
+
logger.info(f"Loading image from URL: {url}")
|
| 56 |
+
|
| 57 |
+
# Validate URL format
|
| 58 |
+
if not url.startswith(('http://', 'https://')):
|
| 59 |
+
logger.error(f"Invalid URL format: {url}")
|
| 60 |
+
return None
|
| 61 |
+
|
| 62 |
+
# Make request with headers to avoid blocking
|
| 63 |
+
headers = {
|
| 64 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
response = requests.get(url, timeout=timeout, headers=headers, stream=True)
|
| 68 |
+
response.raise_for_status()
|
| 69 |
+
|
| 70 |
+
# Check content type
|
| 71 |
+
content_type = response.headers.get('content-type', '').lower()
|
| 72 |
+
if not any(img_type in content_type for img_type in ['image/', 'jpeg', 'png', 'webp']):
|
| 73 |
+
logger.error(f"Invalid content type: {content_type}")
|
| 74 |
+
return None
|
| 75 |
+
|
| 76 |
+
# Check file size
|
| 77 |
+
content_length = response.headers.get('content-length')
|
| 78 |
+
if content_length and int(content_length) > max_file_size:
|
| 79 |
+
logger.error(f"File too large: {content_length} bytes")
|
| 80 |
+
return None
|
| 81 |
+
|
| 82 |
+
# Load image data
|
| 83 |
+
image_data = io.BytesIO()
|
| 84 |
+
downloaded_size = 0
|
| 85 |
+
|
| 86 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 87 |
+
downloaded_size += len(chunk)
|
| 88 |
+
if downloaded_size > max_file_size:
|
| 89 |
+
logger.error(f"File too large during download: {downloaded_size} bytes")
|
| 90 |
+
return None
|
| 91 |
+
image_data.write(chunk)
|
| 92 |
+
|
| 93 |
+
image_data.seek(0)
|
| 94 |
+
|
| 95 |
+
# Open and validate image
|
| 96 |
+
image = Image.open(image_data)
|
| 97 |
+
image = image.convert('RGB')
|
| 98 |
+
|
| 99 |
+
logger.info(f"Successfully loaded image: {image.size}")
|
| 100 |
+
return image
|
| 101 |
+
|
| 102 |
+
except requests.exceptions.RequestException as e:
|
| 103 |
+
logger.error(f"Request error loading image from {url}: {e}")
|
| 104 |
+
return None
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.error(f"Error loading image from {url}: {e}")
|
| 107 |
+
return None
|
| 108 |
+
|
| 109 |
+
def load_image_from_file(self, file_path: Union[str, Path]) -> Optional[Image.Image]:
|
| 110 |
+
"""
|
| 111 |
+
Load image from local file with validation
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
file_path: Path to image file
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
PIL Image or None if failed
|
| 118 |
+
"""
|
| 119 |
+
try:
|
| 120 |
+
file_path = Path(file_path)
|
| 121 |
+
|
| 122 |
+
if not file_path.exists():
|
| 123 |
+
logger.error(f"File does not exist: {file_path}")
|
| 124 |
+
return None
|
| 125 |
+
|
| 126 |
+
if file_path.suffix.lower() not in self.supported_formats:
|
| 127 |
+
logger.error(f"Unsupported format: {file_path.suffix}")
|
| 128 |
+
return None
|
| 129 |
+
|
| 130 |
+
image = Image.open(file_path)
|
| 131 |
+
image = image.convert('RGB')
|
| 132 |
+
|
| 133 |
+
logger.info(f"Successfully loaded image from file: {image.size}")
|
| 134 |
+
return image
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
logger.error(f"Error loading image from {file_path}: {e}")
|
| 138 |
+
return None
|
| 139 |
+
|
| 140 |
+
def preprocess_image(
|
| 141 |
+
self,
|
| 142 |
+
image: Image.Image,
|
| 143 |
+
target_size: Optional[Tuple[int, int]] = None,
|
| 144 |
+
maintain_aspect_ratio: bool = True
|
| 145 |
+
) -> Image.Image:
|
| 146 |
+
"""
|
| 147 |
+
Preprocess image for stable diffusion
|
| 148 |
+
|
| 149 |
+
Args:
|
| 150 |
+
image: Input PIL Image
|
| 151 |
+
target_size: Target size (width, height)
|
| 152 |
+
maintain_aspect_ratio: Whether to maintain aspect ratio
|
| 153 |
+
|
| 154 |
+
Returns:
|
| 155 |
+
Preprocessed PIL Image
|
| 156 |
+
"""
|
| 157 |
+
if target_size is None:
|
| 158 |
+
target_size = (512, 512) # Default SD size
|
| 159 |
+
|
| 160 |
+
try:
|
| 161 |
+
# Resize image
|
| 162 |
+
if maintain_aspect_ratio:
|
| 163 |
+
image.thumbnail(target_size, Image.Resampling.LANCZOS)
|
| 164 |
+
|
| 165 |
+
# Create new image with target size and paste resized image
|
| 166 |
+
new_image = Image.new('RGB', target_size, (255, 255, 255))
|
| 167 |
+
paste_x = (target_size[0] - image.width) // 2
|
| 168 |
+
paste_y = (target_size[1] - image.height) // 2
|
| 169 |
+
new_image.paste(image, (paste_x, paste_y))
|
| 170 |
+
image = new_image
|
| 171 |
+
else:
|
| 172 |
+
image = image.resize(target_size, Image.Resampling.LANCZOS)
|
| 173 |
+
|
| 174 |
+
logger.info(f"Preprocessed image to size: {image.size}")
|
| 175 |
+
return image
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
logger.error(f"Error preprocessing image: {e}")
|
| 179 |
+
return image
|
| 180 |
+
|
| 181 |
+
def analyze_image_properties(self, image: Image.Image) -> Dict[str, Any]:
|
| 182 |
+
"""
|
| 183 |
+
Analyze image properties for style reference
|
| 184 |
+
|
| 185 |
+
Args:
|
| 186 |
+
image: PIL Image to analyze
|
| 187 |
+
|
| 188 |
+
Returns:
|
| 189 |
+
Dictionary of image properties
|
| 190 |
+
"""
|
| 191 |
+
try:
|
| 192 |
+
# Basic properties
|
| 193 |
+
width, height = image.size
|
| 194 |
+
aspect_ratio = width / height
|
| 195 |
+
|
| 196 |
+
# Color analysis
|
| 197 |
+
stat = ImageStat.Stat(image)
|
| 198 |
+
avg_brightness = sum(stat.mean) / len(stat.mean)
|
| 199 |
+
avg_contrast = sum(stat.stddev) / len(stat.stddev)
|
| 200 |
+
|
| 201 |
+
# Convert to numpy for additional analysis
|
| 202 |
+
img_array = np.array(image)
|
| 203 |
+
|
| 204 |
+
# Color distribution
|
| 205 |
+
r_mean, g_mean, b_mean = np.mean(img_array, axis=(0, 1))
|
| 206 |
+
color_variance = np.var(img_array, axis=(0, 1))
|
| 207 |
+
|
| 208 |
+
# Edge detection for complexity
|
| 209 |
+
gray = cv2.cvtColor(img_array, cv2.COLOR_RGB2GRAY)
|
| 210 |
+
edges = cv2.Canny(gray, 50, 150)
|
| 211 |
+
edge_density = np.sum(edges > 0) / (width * height)
|
| 212 |
+
|
| 213 |
+
properties = {
|
| 214 |
+
'dimensions': (width, height),
|
| 215 |
+
'aspect_ratio': aspect_ratio,
|
| 216 |
+
'brightness': avg_brightness,
|
| 217 |
+
'contrast': avg_contrast,
|
| 218 |
+
'color_means': (float(r_mean), float(g_mean), float(b_mean)),
|
| 219 |
+
'color_variance': color_variance.tolist(),
|
| 220 |
+
'edge_density': float(edge_density),
|
| 221 |
+
'file_size_pixels': width * height
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
logger.info(f"Analyzed image properties: {properties}")
|
| 225 |
+
return properties
|
| 226 |
+
|
| 227 |
+
except Exception as e:
|
| 228 |
+
logger.error(f"Error analyzing image properties: {e}")
|
| 229 |
+
return {}
|
| 230 |
+
|
| 231 |
+
def generate_image_hash(self, image: Image.Image) -> str:
|
| 232 |
+
"""
|
| 233 |
+
Generate hash for image deduplication
|
| 234 |
+
|
| 235 |
+
Args:
|
| 236 |
+
image: PIL Image
|
| 237 |
+
|
| 238 |
+
Returns:
|
| 239 |
+
MD5 hash string
|
| 240 |
+
"""
|
| 241 |
+
try:
|
| 242 |
+
# Convert image to bytes
|
| 243 |
+
img_bytes = io.BytesIO()
|
| 244 |
+
image.save(img_bytes, format='PNG')
|
| 245 |
+
img_bytes = img_bytes.getvalue()
|
| 246 |
+
|
| 247 |
+
# Generate hash
|
| 248 |
+
hash_md5 = hashlib.md5(img_bytes)
|
| 249 |
+
return hash_md5.hexdigest()
|
| 250 |
+
|
| 251 |
+
except Exception as e:
|
| 252 |
+
logger.error(f"Error generating image hash: {e}")
|
| 253 |
+
return ""
|
| 254 |
+
|
| 255 |
+
class StyleAnalyzer:
|
| 256 |
+
"""
|
| 257 |
+
Analyzes style characteristics of reference images
|
| 258 |
+
"""
|
| 259 |
+
|
| 260 |
+
def __init__(self):
|
| 261 |
+
self.style_keywords = {
|
| 262 |
+
'realistic': ['photo', 'realistic', 'detailed', 'sharp'],
|
| 263 |
+
'artistic': ['painting', 'artistic', 'brushstrokes', 'canvas'],
|
| 264 |
+
'anime': ['anime', 'manga', 'cartoon', 'stylized'],
|
| 265 |
+
'abstract': ['abstract', 'geometric', 'surreal', 'conceptual'],
|
| 266 |
+
'vintage': ['vintage', 'retro', 'aged', 'classic'],
|
| 267 |
+
'modern': ['modern', 'contemporary', 'clean', 'minimal']
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
def suggest_style_keywords(self, image_properties: Dict[str, Any]) -> List[str]:
|
| 271 |
+
"""
|
| 272 |
+
Suggest style keywords based on image analysis
|
| 273 |
+
|
| 274 |
+
Args:
|
| 275 |
+
image_properties: Properties from analyze_image_properties
|
| 276 |
+
|
| 277 |
+
Returns:
|
| 278 |
+
List of suggested style keywords
|
| 279 |
+
"""
|
| 280 |
+
suggestions = []
|
| 281 |
+
|
| 282 |
+
try:
|
| 283 |
+
brightness = image_properties.get('brightness', 128)
|
| 284 |
+
contrast = image_properties.get('contrast', 50)
|
| 285 |
+
edge_density = image_properties.get('edge_density', 0.1)
|
| 286 |
+
|
| 287 |
+
# Brightness-based suggestions
|
| 288 |
+
if brightness < 100:
|
| 289 |
+
suggestions.extend(['dark', 'moody', 'dramatic'])
|
| 290 |
+
elif brightness > 180:
|
| 291 |
+
suggestions.extend(['bright', 'light', 'airy'])
|
| 292 |
+
|
| 293 |
+
# Contrast-based suggestions
|
| 294 |
+
if contrast > 80:
|
| 295 |
+
suggestions.extend(['high contrast', 'bold', 'striking'])
|
| 296 |
+
elif contrast < 30:
|
| 297 |
+
suggestions.extend(['soft', 'gentle', 'muted'])
|
| 298 |
+
|
| 299 |
+
# Edge density-based suggestions
|
| 300 |
+
if edge_density > 0.2:
|
| 301 |
+
suggestions.extend(['detailed', 'complex', 'intricate'])
|
| 302 |
+
elif edge_density < 0.05:
|
| 303 |
+
suggestions.extend(['smooth', 'simple', 'minimalist'])
|
| 304 |
+
|
| 305 |
+
return list(set(suggestions)) # Remove duplicates
|
| 306 |
+
|
| 307 |
+
except Exception as e:
|
| 308 |
+
logger.error(f"Error suggesting style keywords: {e}")
|
| 309 |
+
return []
|
src/utils/logging_utils.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Logging utilities for CompI project.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import logging
|
| 6 |
+
import sys
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from src.config import LOG_LEVEL, LOG_FORMAT, PROJECT_ROOT
|
| 9 |
+
|
| 10 |
+
def setup_logger(name: str, log_file: str = None) -> logging.Logger:
|
| 11 |
+
"""
|
| 12 |
+
Set up a logger with console and optional file output.
|
| 13 |
+
|
| 14 |
+
Args:
|
| 15 |
+
name: Logger name
|
| 16 |
+
log_file: Optional log file path
|
| 17 |
+
|
| 18 |
+
Returns:
|
| 19 |
+
Configured logger instance
|
| 20 |
+
"""
|
| 21 |
+
logger = logging.getLogger(name)
|
| 22 |
+
logger.setLevel(getattr(logging, LOG_LEVEL.upper()))
|
| 23 |
+
|
| 24 |
+
# Clear existing handlers
|
| 25 |
+
logger.handlers.clear()
|
| 26 |
+
|
| 27 |
+
# Console handler
|
| 28 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 29 |
+
console_handler.setLevel(logging.INFO)
|
| 30 |
+
console_formatter = logging.Formatter(LOG_FORMAT)
|
| 31 |
+
console_handler.setFormatter(console_formatter)
|
| 32 |
+
logger.addHandler(console_handler)
|
| 33 |
+
|
| 34 |
+
# File handler (optional)
|
| 35 |
+
if log_file:
|
| 36 |
+
log_path = PROJECT_ROOT / "logs"
|
| 37 |
+
log_path.mkdir(exist_ok=True)
|
| 38 |
+
|
| 39 |
+
file_handler = logging.FileHandler(log_path / log_file)
|
| 40 |
+
file_handler.setLevel(logging.DEBUG)
|
| 41 |
+
file_formatter = logging.Formatter(LOG_FORMAT)
|
| 42 |
+
file_handler.setFormatter(file_formatter)
|
| 43 |
+
logger.addHandler(file_handler)
|
| 44 |
+
|
| 45 |
+
return logger
|