|
|
|
""" |
|
Functions which plot confidence elipses around clusters. |
|
""" |
|
|
|
import matplotlib.pyplot as plt |
|
from matplotlib.patches import Ellipse |
|
import matplotlib |
|
import matplotlib.transforms as transforms |
|
import numpy as np |
|
import pandas as pd |
|
|
|
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs): |
|
""" |
|
Create a plot of the covariance confidence ellipse of *x* and *y*. |
|
|
|
Parameters |
|
---------- |
|
x, y : array-like, shape (n, ) |
|
Input data. |
|
|
|
ax : matplotlib.axes.Axes |
|
The axes object to draw the ellipse into. |
|
|
|
n_std : float |
|
The number of standard deviations to determine the ellipse's radiuses. |
|
|
|
**kwargs |
|
Forwarded to `~matplotlib.patches.Ellipse` |
|
|
|
Returns |
|
------- |
|
matplotlib.patches.Ellipse |
|
""" |
|
if x.size != y.size: |
|
raise ValueError("x and y must be the same size") |
|
|
|
cov = np.cov(x, y) |
|
pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1]) |
|
|
|
|
|
ell_radius_x = np.sqrt(1 + pearson) |
|
ell_radius_y = np.sqrt(1 - pearson) |
|
ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2, |
|
facecolor=facecolor, **kwargs) |
|
|
|
|
|
|
|
|
|
scale_x = np.sqrt(cov[0, 0]) * n_std |
|
mean_x = np.mean(x) |
|
|
|
|
|
scale_y = np.sqrt(cov[1, 1]) * n_std |
|
mean_y = np.mean(y) |
|
|
|
transf = transforms.Affine2D() \ |
|
.rotate_deg(45) \ |
|
.scale(scale_x, scale_y) \ |
|
.translate(mean_x, mean_y) |
|
|
|
ellipse.set_transform(transf + ax.transData) |
|
return ax.add_patch(ellipse) |
|
|
|
def plot_cluster_ellipses(df, ax=None, color=None, annotation_color=None, color_map=None): |
|
if ax is None: |
|
fig, ax = plt.subplots(figsize=(13,13)) |
|
|
|
|
|
unique_label,cluster_rep_index, counts = np.unique(df.labels, return_index=True, return_counts=True) |
|
cmap = plt.get_cmap('turbo') |
|
norm = matplotlib.colors.Normalize(vmin=min(df.labels), vmax=max(df.labels)) |
|
|
|
for label, rep_id in zip(unique_label, cluster_rep_index): |
|
if label != -1: |
|
if color_map: |
|
color = cmap(norm(label)) |
|
annotation_color = cmap(norm(label)) |
|
|
|
|
|
cluster_x_y = df[df.labels==label][["fx", "fy"]].to_numpy() |
|
confidence_ellipse(cluster_x_y[:, 0], cluster_x_y[:, 1], ax, edgecolor=color, n_std=3) |
|
ax.annotate(label, cluster_x_y.mean(0)+[-7,0],color=annotation_color,alpha=1, weight='normal', ha='center', va='center', size=9) |
|
return ax |
|
|
|
def plot_groups(df, column, ax=None, values=None): |
|
import colorcet as cc |
|
|
|
if ax is None: |
|
fig, ax = plt.subplots(figsize=(13,13)) |
|
if column not in df.columns: |
|
raise IndexError(f"Column {column} is not in the dataframe") |
|
|
|
if not values: |
|
values = df[column].unique() |
|
|
|
for i, value in enumerate(values): |
|
indices = df[column]==value |
|
if (value == -1) and (column=="labels"): |
|
ax.scatter(df.fx[indices], df.fy[indices],s=1, c="black", label=value) |
|
else: |
|
ax.scatter(df.fx[indices], df.fy[indices],s=4, c=cc.glasbey[i%len(cc.glasbey)], label=value) |
|
|
|
if len(values) > len(cc.glasbey): |
|
print(f"Colors used multiple times since number of categories exceeds {len(cc.glasbey)}.") |
|
|
|
return ax |
|
|
|
|