Spaces:
Running
Running
File size: 975 Bytes
81ebdf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
import numpy as numpy
import pandas as pd
import numpy as np
import tensorflow_hub as hub
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
def embed(input):
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4"
model = hub.load(module_url)
return model(input)
def generate_use_embeddings(data):
embeddings = embed(data)
embeddings = np.array(embeddings).tolist()
return embeddings
def autogenerate_labels(df):
map_data = df['Map Data'].to_numpy()
embeddings_list = generate_use_embeddings(map_data)
np_embeddings = np.array(embeddings_list)
df_embeddings = pd.DataFrame(np_embeddings)
scaler = StandardScaler()
scaled_embeddings = scaler.fit_transform(np_embeddings)
n_clusters = 4
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(scaled_embeddings)
y_kmeans = kmeans.labels_
df['label'] = y_kmeans + 1
return df, df_embeddings |