File size: 975 Bytes
81ebdf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import numpy as numpy
import pandas as pd
import numpy as np
import tensorflow_hub as hub
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler


def embed(input):
    module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
    model = hub.load(module_url)
    return model(input)

def generate_use_embeddings(data):
    embeddings = embed(data)
    embeddings = np.array(embeddings).tolist()
    return embeddings

def autogenerate_labels(df):
    map_data = df['Map Data'].to_numpy()

    embeddings_list = generate_use_embeddings(map_data)
    np_embeddings = np.array(embeddings_list)
    df_embeddings = pd.DataFrame(np_embeddings)
    scaler = StandardScaler()
    scaled_embeddings = scaler.fit_transform(np_embeddings)

    n_clusters = 4
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(scaled_embeddings)

    y_kmeans = kmeans.labels_

    df['label'] = y_kmeans + 1
    return df, df_embeddings