""" |
Functions which plot confidence elipses around clusters. |
""" |
import matplotlib.pyplot as plt |
from matplotlib.patches import Ellipse |
import matplotlib |
import matplotlib.transforms as transforms |
import numpy as np |
import pandas as pd |
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): |
""" |
Create a plot of the covariance confidence ellipse of *x* and *y*. |
Parameters |
---------- |
x, y : array-like, shape (n, ) |
Input data. |
ax : matplotlib.axes.Axes |
The axes object to draw the ellipse into. |
n_std : float |
The number of standard deviations to determine the ellipse's radiuses. |
**kwargs |
Forwarded to `~matplotlib.patches.Ellipse` |
Returns |
------- |
matplotlib.patches.Ellipse |
""" |
if x.size != y.size: |
raise ValueError("x and y must be the same size") |
cov = np.cov(x, y) |
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) |
ell_radius_x = np.sqrt(1 + pearson) |
ell_radius_y = np.sqrt(1 - pearson) |
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, |
facecolor=facecolor, **kwargs) |
scale_x = np.sqrt(cov[0, 0]) * n_std |
mean_x = np.mean(x) |
scale_y = np.sqrt(cov[1, 1]) * n_std |
mean_y = np.mean(y) |
transf = transforms.Affine2D() \ |
.rotate_deg(45) \ |
.scale(scale_x, scale_y) \ |
.translate(mean_x, mean_y) |
ellipse.set_transform(transf + ax.transData) |
return ax.add_patch(ellipse) |
def plot_cluster_ellipses(df, ax=None, color=None, annotation_color=None, color_map=None): |
if ax is None: |
fig, ax = plt.subplots(figsize=(13,13)) |
unique_label,cluster_rep_index, counts = np.unique(df.labels, return_index=True, return_counts=True) |
cmap = plt.get_cmap('turbo') |
norm = matplotlib.colors.Normalize(vmin=min(df.labels), vmax=max(df.labels)) |
for label, rep_id in zip(unique_label, cluster_rep_index): |
if label != -1: |
if color_map: |
color = cmap(norm(label)) |
annotation_color = cmap(norm(label)) |
cluster_x_y = df[df.labels==label][["fx", "fy"]].to_numpy() |
confidence_ellipse(cluster_x_y[:, 0], cluster_x_y[:, 1], ax, edgecolor=color, n_std=3) |
ax.annotate(label, cluster_x_y.mean(0)+[-7,0],color=annotation_color,alpha=1, weight='normal', ha='center', va='center', size=9) |
return ax |
def plot_groups(df, column, ax=None, values=None): |
import colorcet as cc |
if ax is None: |
fig, ax = plt.subplots(figsize=(13,13)) |
if column not in df.columns: |
raise IndexError(f"Column {column} is not in the dataframe") |
if not values: |
values = df[column].unique() |
for i, value in enumerate(values): |
indices = df[column]==value |
if (value == -1) and (column=="labels"): |
ax.scatter(df.fx[indices], df.fy[indices],s=1, c="black", label=value) |
else: |
ax.scatter(df.fx[indices], df.fy[indices],s=4, c=cc.glasbey[i%len(cc.glasbey)], label=value) |
if len(values) > len(cc.glasbey): |
print(f"Colors used multiple times since number of categories exceeds {len(cc.glasbey)}.") |
return ax |