""" This is a simple application for sentence embeddings: clustering Sentences are mapped to sentence embeddings and then k-mean clustering is applied. """ from sentence_transformers import SentenceTransformer from sklearn.cluster import KMeans embedder = SentenceTransformer('all-MiniLM-L6-v2') # Corpus with example sentences corpus = ['A man is eating food.', 'A man is eating a piece of bread.', 'A man is eating pasta.', 'The girl is carrying a baby.', 'The baby is carried by the woman', 'A man is riding a horse.', 'A man is riding a white horse on an enclosed ground.', 'A monkey is playing drums.', 'Someone in a gorilla costume is playing a set of drums.', 'A cheetah is running behind its prey.', 'A cheetah chases prey on across a field.' ] corpus_embeddings = embedder.encode(corpus) # Perform kmean clustering num_clusters = 5 clustering_model = KMeans(n_clusters=num_clusters) clustering_model.fit(corpus_embeddings) cluster_assignment = clustering_model.labels_ clustered_sentences = [[] for i in range(num_clusters)] for sentence_id, cluster_id in enumerate(cluster_assignment): clustered_sentences[cluster_id].append(corpus[sentence_id]) for i, cluster in enumerate(clustered_sentences): print("Cluster ", i+1) print(cluster) print("")