Spaces:
Running
Running
import numpy as numpy | |
import pandas as pd | |
import numpy as np | |
import tensorflow_hub as hub | |
from sklearn.cluster import KMeans | |
from sklearn.preprocessing import StandardScaler | |
def embed(input): | |
module_url = "https://tfhub.dev/google/universal-sentence-encoder/4" | |
model = hub.load(module_url) | |
return model(input) | |
def generate_use_embeddings(data): | |
embeddings = embed(data) | |
embeddings = np.array(embeddings).tolist() | |
return embeddings | |
def autogenerate_labels(df): | |
map_data = df['Map Data'].to_numpy() | |
embeddings_list = generate_use_embeddings(map_data) | |
np_embeddings = np.array(embeddings_list) | |
df_embeddings = pd.DataFrame(np_embeddings) | |
scaler = StandardScaler() | |
scaled_embeddings = scaler.fit_transform(np_embeddings) | |
n_clusters = 4 | |
kmeans = KMeans(n_clusters=n_clusters, random_state=42) | |
kmeans.fit(scaled_embeddings) | |
y_kmeans = kmeans.labels_ | |
df['label'] = y_kmeans + 1 | |
return df, df_embeddings |