Spaces:

YuWang0103
/

LGGM-Text2Graph

Runtime error

App Files Files Community

LGGM-Text2Graph / analysis /dist_helper.py

YuWang0103

Upload 41 files

6b59850 verified 10 months ago

raw

history blame contribute delete

5.1 kB

	###############################################################################
	#
	# Adapted from https://github.com/lrjconan/GRAN/ which in turn is adapted from https://github.com/JiaxuanYou/graph-generation
	#
	###############################################################################
	import pyemd
	import numpy as np
	import concurrent.futures
	from functools import partial
	from scipy.linalg import toeplitz


	def emd(x, y, distance_scaling=1.0):
	support_size = max(len(x), len(y))
	d_mat = toeplitz(range(support_size)).astype(float)
	distance_mat = d_mat / distance_scaling

	# convert histogram values x and y to float, and make them equal len
	x = x.astype(float)
	y = y.astype(float)
	if len(x) < len(y):
	x = np.hstack((x, [0.0] * (support_size - len(x))))
	elif len(y) < len(x):
	y = np.hstack((y, [0.0] * (support_size - len(y))))

	emd = pyemd.emd(x, y, distance_mat)
	return emd



	def l2(x, y):
	dist = np.linalg.norm(x - y, 2)
	return dist


	def emd(x, y, sigma=1.0, distance_scaling=1.0):
	''' EMD
	Args:
	x, y: 1D pmf of two distributions with the same support
	sigma: standard deviation
	'''
	support_size = max(len(x), len(y))
	d_mat = toeplitz(range(support_size)).astype(float)
	distance_mat = d_mat / distance_scaling

	# convert histogram values x and y to float, and make them equal len
	x = x.astype(float)
	y = y.astype(float)
	if len(x) < len(y):
	x = np.hstack((x, [0.0] * (support_size - len(x))))
	elif len(y) < len(x):
	y = np.hstack((y, [0.0] * (support_size - len(y))))

	return np.abs(pyemd.emd(x, y, distance_mat))


	def gaussian_emd(x, y, sigma=1.0, distance_scaling=1.0):
	''' Gaussian kernel with squared distance in exponential term replaced by EMD
	Args:
	x, y: 1D pmf of two distributions with the same support
	sigma: standard deviation
	'''
	support_size = max(len(x), len(y))
	d_mat = toeplitz(range(support_size)).astype(float)
	distance_mat = d_mat / distance_scaling

	# convert histogram values x and y to float, and make them equal len
	x = x.astype(float)
	y = y.astype(float)
	if len(x) < len(y):
	x = np.hstack((x, [0.0] * (support_size - len(x))))
	elif len(y) < len(x):
	y = np.hstack((y, [0.0] * (support_size - len(y))))

	emd = pyemd.emd(x, y, distance_mat)
	return np.exp(-emd * emd / (2 * sigma * sigma))


	def gaussian(x, y, sigma=1.0):
	support_size = max(len(x), len(y))
	# convert histogram values x and y to float, and make them equal len
	x = x.astype(float)
	y = y.astype(float)
	if len(x) < len(y):
	x = np.hstack((x, [0.0] * (support_size - len(x))))
	elif len(y) < len(x):
	y = np.hstack((y, [0.0] * (support_size - len(y))))

	dist = np.linalg.norm(x - y, 2)
	return np.exp(-dist * dist / (2 * sigma * sigma))


	def gaussian_tv(x, y, sigma=1.0):
	support_size = max(len(x), len(y))
	# convert histogram values x and y to float, and make them equal len
	x = x.astype(float)
	y = y.astype(float)
	if len(x) < len(y):
	x = np.hstack((x, [0.0] * (support_size - len(x))))
	elif len(y) < len(x):
	y = np.hstack((y, [0.0] * (support_size - len(y))))

	dist = np.abs(x - y).sum() / 2.0
	return np.exp(-dist * dist / (2 * sigma * sigma))


	def kernel_parallel_unpacked(x, samples2, kernel):
	d = 0
	for s2 in samples2:
	d += kernel(x, s2)
	return d


	def kernel_parallel_worker(t):
	return kernel_parallel_unpacked(*t)


	def disc(samples1, samples2, kernel, is_parallel=True, args, *kwargs):
	''' Discrepancy between 2 samples '''
	d = 0

	if not is_parallel:
	for s1 in samples1:
	for s2 in samples2:
	d += kernel(s1, s2, args, *kwargs)
	else:
	with concurrent.futures.ThreadPoolExecutor() as executor:
	for dist in executor.map(kernel_parallel_worker, [
	(s1, samples2, partial(kernel, args, *kwargs)) for s1 in samples1
	]):
	d += dist
	if len(samples1) * len(samples2) > 0:
	d /= len(samples1) * len(samples2)
	else:
	d = 1e+6
	return d


	def compute_mmd(samples1, samples2, kernel, is_hist=True, args, *kwargs):
	''' MMD between two samples '''
	# normalize histograms into pmf
	if is_hist:
	samples1 = [s1 / (np.sum(s1) + 1e-6) for s1 in samples1]
	samples2 = [s2 / (np.sum(s2) + 1e-6) for s2 in samples2]
	return disc(samples1, samples1, kernel, args, kwargs) + disc(samples2, samples2, kernel, args, **kwargs) - \
	2 * disc(samples1, samples2, kernel, args, *kwargs)


	def compute_emd(samples1, samples2, kernel, is_hist=True, args, *kwargs):
	''' EMD between average of two samples '''
	# normalize histograms into pmf
	if is_hist:
	samples1 = [np.mean(samples1)]
	samples2 = [np.mean(samples2)]
	return disc(samples1, samples2, kernel, *args,
	**kwargs), [samples1[0], samples2[0]]