2Dmatters
/

Elf_encoder

Model card Files Files and versions Community

Elf_encoder / src /cluster_plotters.py

thomaswarford's picture

Upload 77 files

90f531c verified 4 months ago

3.57 kB

	#!/usr/bin/env python
	"""
	Functions which plot confidence elipses around clusters.
	"""

	import matplotlib.pyplot as plt
	from matplotlib.patches import Ellipse
	import matplotlib
	import matplotlib.transforms as transforms
	import numpy as np
	import pandas as pd

	def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
	"""
	Create a plot of the covariance confidence ellipse of x and y.

	Parameters
	----------
	x, y : array-like, shape (n, )
	Input data.

	ax : matplotlib.axes.Axes
	The axes object to draw the ellipse into.

	n_std : float
	The number of standard deviations to determine the ellipse's radiuses.

	**kwargs
	Forwarded to `~matplotlib.patches.Ellipse`

	Returns
	-------
	matplotlib.patches.Ellipse
	"""
	if x.size != y.size:
	raise ValueError("x and y must be the same size")

	cov = np.cov(x, y)
	pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
	# Using a special case to obtain the eigenvalues of this
	# two-dimensional dataset.
	ell_radius_x = np.sqrt(1 + pearson)
	ell_radius_y = np.sqrt(1 - pearson)
	ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
	facecolor=facecolor, **kwargs)

	# Calculating the standard deviation of x from
	# the squareroot of the variance and multiplying
	# with the given number of standard deviations.
	scale_x = np.sqrt(cov[0, 0]) * n_std
	mean_x = np.mean(x)

	# calculating the standard deviation of y ...
	scale_y = np.sqrt(cov[1, 1]) * n_std
	mean_y = np.mean(y)

	transf = transforms.Affine2D() \
	.rotate_deg(45) \
	.scale(scale_x, scale_y) \
	.translate(mean_x, mean_y)

	ellipse.set_transform(transf + ax.transData)
	return ax.add_patch(ellipse)

	def plot_cluster_ellipses(df, ax=None, color=None, annotation_color=None, color_map=None):
	if ax is None:
	fig, ax = plt.subplots(figsize=(13,13))


	unique_label,cluster_rep_index, counts = np.unique(df.labels, return_index=True, return_counts=True)
	cmap = plt.get_cmap('turbo')
	norm = matplotlib.colors.Normalize(vmin=min(df.labels), vmax=max(df.labels))

	for label, rep_id in zip(unique_label, cluster_rep_index):
	if label != -1:
	if color_map:
	color = cmap(norm(label))
	annotation_color = cmap(norm(label))


	cluster_x_y = df[df.labels==label][["fx", "fy"]].to_numpy()
	confidence_ellipse(cluster_x_y[:, 0], cluster_x_y[:, 1], ax, edgecolor=color, n_std=3)
	ax.annotate(label, cluster_x_y.mean(0)+[-7,0],color=annotation_color,alpha=1, weight='normal', ha='center', va='center', size=9)
	return ax

	def plot_groups(df, column, ax=None, values=None):
	import colorcet as cc

	if ax is None:
	fig, ax = plt.subplots(figsize=(13,13))
	if column not in df.columns:
	raise IndexError(f"Column {column} is not in the dataframe")

	if not values:
	values = df[column].unique()

	for i, value in enumerate(values):
	indices = df[column]==value
	if (value == -1) and (column=="labels"):
	ax.scatter(df.fx[indices], df.fy[indices],s=1, c="black", label=value)
	else:
	ax.scatter(df.fx[indices], df.fy[indices],s=4, c=cc.glasbey[i%len(cc.glasbey)], label=value)

	if len(values) > len(cc.glasbey):
	print(f"Colors used multiple times since number of categories exceeds {len(cc.glasbey)}.")

	return ax