Upload neat\datasets.py with huggingface_hub
Browse files- neat//datasets.py +220 -0
neat//datasets.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Dataset generation functions for testing BackpropNEAT."""
|
| 2 |
+
|
| 3 |
+
import numpy as np
|
| 4 |
+
import jax.numpy as jnp
|
| 5 |
+
|
| 6 |
+
def generate_xor_data(n_samples: int = 200, complexity: float = 1.0) -> tuple:
|
| 7 |
+
"""Generate complex XOR dataset with multiple clusters and rotations.
|
| 8 |
+
|
| 9 |
+
Args:
|
| 10 |
+
n_samples: Number of samples per quadrant
|
| 11 |
+
complexity: Controls the complexity of the pattern (rotation and noise)
|
| 12 |
+
|
| 13 |
+
Returns:
|
| 14 |
+
Tuple of (features, labels)
|
| 15 |
+
"""
|
| 16 |
+
points = []
|
| 17 |
+
labels = []
|
| 18 |
+
|
| 19 |
+
# Generate multiple clusters per quadrant
|
| 20 |
+
n_clusters = 3
|
| 21 |
+
samples_per_cluster = n_samples // n_clusters
|
| 22 |
+
|
| 23 |
+
for cluster in range(n_clusters):
|
| 24 |
+
# Add rotation to each subsequent cluster
|
| 25 |
+
rotation = complexity * cluster * np.pi / 6 # 30 degree rotation per cluster
|
| 26 |
+
|
| 27 |
+
# Define cluster centers with gaps
|
| 28 |
+
centers = [
|
| 29 |
+
# (x, y, radius, label)
|
| 30 |
+
(-0.7 - 0.3*cluster, -0.7 - 0.3*cluster, 0.2, -1), # Bottom-left
|
| 31 |
+
(0.7 + 0.3*cluster, 0.7 + 0.3*cluster, 0.2, -1), # Top-right
|
| 32 |
+
(-0.7 - 0.3*cluster, 0.7 + 0.3*cluster, 0.2, 1), # Top-left
|
| 33 |
+
(0.7 + 0.3*cluster, -0.7 - 0.3*cluster, 0.2, 1), # Bottom-right
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
for cx, cy, radius, label in centers:
|
| 37 |
+
# Generate points in a circle around center
|
| 38 |
+
theta = np.random.uniform(0, 2*np.pi, samples_per_cluster)
|
| 39 |
+
r = np.random.uniform(0, radius, samples_per_cluster)
|
| 40 |
+
|
| 41 |
+
# Convert to cartesian coordinates
|
| 42 |
+
x = r * np.cos(theta)
|
| 43 |
+
y = r * np.sin(theta)
|
| 44 |
+
|
| 45 |
+
# Apply rotation
|
| 46 |
+
x_rot = x * np.cos(rotation) - y * np.sin(rotation)
|
| 47 |
+
y_rot = x * np.sin(rotation) + y * np.cos(rotation)
|
| 48 |
+
|
| 49 |
+
# Add cluster center and noise
|
| 50 |
+
x = cx + x_rot + np.random.normal(0, 0.05, samples_per_cluster)
|
| 51 |
+
y = cy + y_rot + np.random.normal(0, 0.05, samples_per_cluster)
|
| 52 |
+
|
| 53 |
+
# Add points
|
| 54 |
+
cluster_points = np.column_stack([x, y])
|
| 55 |
+
points.append(cluster_points)
|
| 56 |
+
labels.extend([label] * samples_per_cluster)
|
| 57 |
+
|
| 58 |
+
# Convert to arrays
|
| 59 |
+
X = np.vstack(points)
|
| 60 |
+
y = np.array(labels, dtype=np.float32)
|
| 61 |
+
|
| 62 |
+
# Add global rotation
|
| 63 |
+
theta = complexity * np.pi / 4 # 45 degree global rotation
|
| 64 |
+
rotation_matrix = np.array([
|
| 65 |
+
[np.cos(theta), -np.sin(theta)],
|
| 66 |
+
[np.sin(theta), np.cos(theta)]
|
| 67 |
+
])
|
| 68 |
+
X = X @ rotation_matrix
|
| 69 |
+
|
| 70 |
+
# Shuffle data
|
| 71 |
+
perm = np.random.permutation(len(X))
|
| 72 |
+
X = X[perm]
|
| 73 |
+
y = y[perm]
|
| 74 |
+
|
| 75 |
+
return jnp.array(X), jnp.array(y)
|
| 76 |
+
|
| 77 |
+
def generate_circle_data(n_samples: int = 1000, noise: float = 0.1) -> tuple:
|
| 78 |
+
"""Generate circle classification dataset.
|
| 79 |
+
|
| 80 |
+
Args:
|
| 81 |
+
n_samples: Number of samples per class
|
| 82 |
+
noise: Standard deviation of Gaussian noise
|
| 83 |
+
|
| 84 |
+
Returns:
|
| 85 |
+
Tuple of (features, labels)
|
| 86 |
+
"""
|
| 87 |
+
# Generate random angles
|
| 88 |
+
theta = np.random.uniform(0, 2*np.pi, n_samples)
|
| 89 |
+
|
| 90 |
+
# Inner circle (class -1)
|
| 91 |
+
r_inner = 0.5 + np.random.normal(0, noise, n_samples)
|
| 92 |
+
X_inner = np.column_stack([
|
| 93 |
+
r_inner * np.cos(theta),
|
| 94 |
+
r_inner * np.sin(theta)
|
| 95 |
+
])
|
| 96 |
+
y_inner = np.full(n_samples, -1.0)
|
| 97 |
+
|
| 98 |
+
# Outer circle (class 1)
|
| 99 |
+
r_outer = 1.5 + np.random.normal(0, noise, n_samples)
|
| 100 |
+
X_outer = np.column_stack([
|
| 101 |
+
r_outer * np.cos(theta),
|
| 102 |
+
r_outer * np.sin(theta)
|
| 103 |
+
])
|
| 104 |
+
y_outer = np.full(n_samples, 1.0)
|
| 105 |
+
|
| 106 |
+
# Combine and shuffle
|
| 107 |
+
X = np.vstack([X_inner, X_outer])
|
| 108 |
+
y = np.hstack([y_inner, y_outer])
|
| 109 |
+
|
| 110 |
+
# Shuffle
|
| 111 |
+
perm = np.random.permutation(len(X))
|
| 112 |
+
return X[perm], y[perm]
|
| 113 |
+
|
| 114 |
+
def generate_spiral_dataset(n_points=1000, noise=0.1):
|
| 115 |
+
"""Generate a spiral dataset with rotation-invariant features."""
|
| 116 |
+
# Generate theta values with more points near the center
|
| 117 |
+
theta = np.sqrt(np.random.uniform(0, 1, n_points)) * 4 * np.pi
|
| 118 |
+
|
| 119 |
+
# Generate two spirals
|
| 120 |
+
data = []
|
| 121 |
+
labels = []
|
| 122 |
+
eps = 1e-8
|
| 123 |
+
|
| 124 |
+
for i in range(n_points):
|
| 125 |
+
# Base radius increases with theta
|
| 126 |
+
r_base = theta[i] / (4 * np.pi)
|
| 127 |
+
|
| 128 |
+
# Add noise that scales with radius
|
| 129 |
+
noise_scale = noise * (1 - np.exp(-2 * r_base))
|
| 130 |
+
|
| 131 |
+
for spiral_idx in range(2):
|
| 132 |
+
# Rotate second spiral by pi
|
| 133 |
+
angle = theta[i] + np.pi * spiral_idx
|
| 134 |
+
|
| 135 |
+
# Add controlled noise to radius and angle
|
| 136 |
+
r = r_base + np.random.normal(0, noise_scale)
|
| 137 |
+
angle_noise = np.random.normal(0, noise_scale * 0.1) # Less noise in angle
|
| 138 |
+
angle += angle_noise
|
| 139 |
+
|
| 140 |
+
# Calculate cartesian coordinates
|
| 141 |
+
x = r * np.cos(angle)
|
| 142 |
+
y = r * np.sin(angle)
|
| 143 |
+
|
| 144 |
+
# Calculate polar coordinates
|
| 145 |
+
r_point = np.sqrt(x*x + y*y)
|
| 146 |
+
theta_point = np.arctan2(y, x)
|
| 147 |
+
|
| 148 |
+
# Unwrap theta to handle multiple revolutions
|
| 149 |
+
theta_unwrapped = theta_point + 2 * np.pi * (angle // (2 * np.pi))
|
| 150 |
+
|
| 151 |
+
# Calculate spiral-specific features
|
| 152 |
+
|
| 153 |
+
# 1. Local curvature (how much the spiral curves at this point)
|
| 154 |
+
curvature = 1 / (r_point + eps)
|
| 155 |
+
|
| 156 |
+
# 2. Spiral phase (position along spiral revolution)
|
| 157 |
+
phase = theta_unwrapped % (2 * np.pi) / (2 * np.pi)
|
| 158 |
+
|
| 159 |
+
# 3. Radial velocity (how fast radius changes with angle)
|
| 160 |
+
dr_dtheta = 1 / (4 * np.pi)
|
| 161 |
+
|
| 162 |
+
# 4. Normalized angular position (accounts for multiple revolutions)
|
| 163 |
+
angular_pos = theta_unwrapped / (4 * np.pi)
|
| 164 |
+
|
| 165 |
+
# 5. Spiral tightness (local measure of how tight the spiral is)
|
| 166 |
+
tightness = r_point / (theta_unwrapped + eps)
|
| 167 |
+
|
| 168 |
+
# 6. Relative position features (help distinguish between spirals)
|
| 169 |
+
# Distance to other spiral
|
| 170 |
+
other_angle = angle + np.pi
|
| 171 |
+
other_x = r * np.cos(other_angle)
|
| 172 |
+
other_y = r * np.sin(other_angle)
|
| 173 |
+
dist_to_other = np.sqrt((x - other_x)**2 + (y - other_y)**2)
|
| 174 |
+
|
| 175 |
+
# 7. Rotation-invariant features
|
| 176 |
+
sin_phase = np.sin(phase * 2 * np.pi)
|
| 177 |
+
cos_phase = np.cos(phase * 2 * np.pi)
|
| 178 |
+
|
| 179 |
+
# Combine features with careful normalization
|
| 180 |
+
features = np.array([
|
| 181 |
+
x / 2.0, # Normalize coordinates
|
| 182 |
+
y / 2.0,
|
| 183 |
+
r_point / 2.0, # Normalize radius
|
| 184 |
+
sin_phase, # Already normalized
|
| 185 |
+
cos_phase, # Already normalized
|
| 186 |
+
np.tanh(curvature * 2), # Normalize curvature
|
| 187 |
+
angular_pos / 2.0, # Normalize angular position
|
| 188 |
+
np.tanh(tightness), # Normalize tightness
|
| 189 |
+
np.tanh(dr_dtheta * 10), # Normalize radial velocity
|
| 190 |
+
dist_to_other / 4.0 # Normalize distance to other spiral
|
| 191 |
+
])
|
| 192 |
+
|
| 193 |
+
data.append(features)
|
| 194 |
+
labels.append(spiral_idx * 2 - 1) # Convert to [-1, 1]
|
| 195 |
+
|
| 196 |
+
return np.array(data), np.array(labels)
|
| 197 |
+
|
| 198 |
+
def generate_checkerboard_data(n_samples: int = 200) -> tuple:
|
| 199 |
+
"""Generate checkerboard dataset.
|
| 200 |
+
|
| 201 |
+
Args:
|
| 202 |
+
n_samples: Number of samples per class
|
| 203 |
+
|
| 204 |
+
Returns:
|
| 205 |
+
Tuple of (features, labels)
|
| 206 |
+
"""
|
| 207 |
+
# Generate random points
|
| 208 |
+
X = np.random.uniform(-2, 2, (n_samples * 2, 2))
|
| 209 |
+
|
| 210 |
+
# Assign labels based on checkerboard pattern
|
| 211 |
+
y = np.zeros(n_samples * 2)
|
| 212 |
+
for i in range(len(X)):
|
| 213 |
+
x1, x2 = X[i]
|
| 214 |
+
y[i] = 1 if (int(np.floor(x1)) + int(np.floor(x2))) % 2 == 0 else 0
|
| 215 |
+
|
| 216 |
+
return jnp.array(X), jnp.array(y)
|
| 217 |
+
|
| 218 |
+
# Export dataset functions
|
| 219 |
+
__all__ = ['generate_xor_data', 'generate_circle_data', 'generate_spiral_dataset',
|
| 220 |
+
'generate_checkerboard_data']
|