lengocduc195
/

SentenceTransformer

pushNe

2359bda almost 2 years ago

1.39 kB

	"""
	This is a simple application for sentence embeddings: clustering

	Sentences are mapped to sentence embeddings and then k-mean clustering is applied.
	"""
	from sentence_transformers import SentenceTransformer
	from sklearn.cluster import KMeans

	embedder = SentenceTransformer('all-MiniLM-L6-v2')

	# Corpus with example sentences
	corpus = ['A man is eating food.',
	'A man is eating a piece of bread.',
	'A man is eating pasta.',
	'The girl is carrying a baby.',
	'The baby is carried by the woman',
	'A man is riding a horse.',
	'A man is riding a white horse on an enclosed ground.',
	'A monkey is playing drums.',
	'Someone in a gorilla costume is playing a set of drums.',
	'A cheetah is running behind its prey.',
	'A cheetah chases prey on across a field.'
	]
	corpus_embeddings = embedder.encode(corpus)

	# Perform kmean clustering
	num_clusters = 5
	clustering_model = KMeans(n_clusters=num_clusters)
	clustering_model.fit(corpus_embeddings)
	cluster_assignment = clustering_model.labels_

	clustered_sentences = [[] for i in range(num_clusters)]
	for sentence_id, cluster_id in enumerate(cluster_assignment):
	clustered_sentences[cluster_id].append(corpus[sentence_id])

	for i, cluster in enumerate(clustered_sentences):
	print("Cluster ", i+1)
	print(cluster)
	print("")