Ishaan Shah
commited on
Commit
Β·
267e3a7
1
Parent(s):
29af37a
init
Browse files- Dockerfile +9 -0
- README.md +6 -4
- api.py +25 -0
- app.py +46 -0
- model.pkl +3 -0
- requirements.txt +0 -0
- train.py +41 -0
- vectorizer.pkl +3 -0
Dockerfile
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.11-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY . .
|
6 |
+
|
7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
8 |
+
|
9 |
+
CMD ["uvicorn", "api:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
@@ -1,9 +1,11 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: blue
|
6 |
-
sdk:
|
|
|
|
|
7 |
pinned: false
|
8 |
license: mit
|
9 |
---
|
|
|
1 |
---
|
2 |
+
title: Prodrectest
|
3 |
+
emoji: π
|
4 |
+
colorFrom: pink
|
5 |
colorTo: blue
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.35.0
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
---
|
api.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
import joblib
|
3 |
+
|
4 |
+
def show_recommendations(product):
|
5 |
+
Y = vectorizer.transform([product])
|
6 |
+
prediction = model.predict(Y)
|
7 |
+
return prediction,
|
8 |
+
|
9 |
+
def get_cluster_terms(cluster_index):
|
10 |
+
cluster_terms = [terms[ind] for ind in order_centroids[cluster_index, :10]]
|
11 |
+
return cluster_terms
|
12 |
+
|
13 |
+
model = joblib.load("./model.pkl")
|
14 |
+
vectorizer = joblib.load("./vectorizer.pkl")
|
15 |
+
|
16 |
+
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
|
17 |
+
terms = vectorizer.get_feature_names_out()
|
18 |
+
|
19 |
+
app = FastAPI()
|
20 |
+
|
21 |
+
@app.post("/inference")
|
22 |
+
def get_recommendations(product: str):
|
23 |
+
cluster_index = int(show_recommendations(product)[0])
|
24 |
+
cluster_terms = get_cluster_terms(cluster_index)
|
25 |
+
return {"cluster": cluster_index, "top_terms": cluster_terms}
|
app.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
4 |
+
from sklearn.cluster import KMeans
|
5 |
+
from fastapi import FastAPI
|
6 |
+
import joblib
|
7 |
+
|
8 |
+
def show_recommendations(product):
|
9 |
+
Y = vectorizer.transform([product])
|
10 |
+
prediction = model.predict(Y)
|
11 |
+
return prediction
|
12 |
+
|
13 |
+
def print_cluster(i):
|
14 |
+
for ind in order_centroids[i, :10]:
|
15 |
+
print(' %s' % terms[ind]),
|
16 |
+
|
17 |
+
def get_cluster_terms(cluster_index):
|
18 |
+
cluster_terms = [terms[ind] for ind in order_centroids[cluster_index, :10]]
|
19 |
+
return cluster_terms
|
20 |
+
|
21 |
+
model = joblib.load("./model.pkl")
|
22 |
+
vectorizer = joblib.load("./vectorizer.pkl")
|
23 |
+
|
24 |
+
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
|
25 |
+
terms = vectorizer.get_feature_names_out()
|
26 |
+
|
27 |
+
st.title("Product Recommendation System")
|
28 |
+
|
29 |
+
# Input for product description
|
30 |
+
product_input = st.text_input("Enter a product description:", "")
|
31 |
+
|
32 |
+
# Button to trigger recommendation
|
33 |
+
if st.button("Get Recommendations"):
|
34 |
+
if product_input:
|
35 |
+
# Get cluster for the input product
|
36 |
+
cluster_index = show_recommendations(product_input)[0]
|
37 |
+
|
38 |
+
# Display the cluster number
|
39 |
+
st.write(f"The product belongs to cluster: {cluster_index}")
|
40 |
+
|
41 |
+
# Display the top terms in the cluster
|
42 |
+
cluster_terms = get_cluster_terms(cluster_index)
|
43 |
+
st.write("Top terms in this cluster:")
|
44 |
+
st.write(", ".join(cluster_terms))
|
45 |
+
else:
|
46 |
+
st.write("Please enter a product description.")
|
model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb889cc791652561f2c91b22cee7216ef634479ed86a5a7602de6f21f5f24ad6
|
3 |
+
size 717173
|
requirements.txt
ADDED
Binary file (286 Bytes). View file
|
|
train.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
3 |
+
from sklearn.cluster import KMeans
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
product_descriptions = pd.read_csv("./train.csv")
|
7 |
+
product_descriptions = product_descriptions.dropna()
|
8 |
+
|
9 |
+
vectorizer = TfidfVectorizer(stop_words='english')
|
10 |
+
X1 = vectorizer.fit_transform(product_descriptions["value"])
|
11 |
+
|
12 |
+
true_k = 10
|
13 |
+
model = KMeans(n_clusters=true_k, init='k-means++', max_iter=100, n_init=1)
|
14 |
+
model.fit(X1)
|
15 |
+
|
16 |
+
def show_recommendations(product):
|
17 |
+
Y = vectorizer.transform([product])
|
18 |
+
prediction = model.predict(Y)
|
19 |
+
return prediction
|
20 |
+
|
21 |
+
def print_cluster(i):
|
22 |
+
for ind in order_centroids[i, :10]:
|
23 |
+
print(' %s' % terms[ind]),
|
24 |
+
|
25 |
+
def get_cluster_terms(cluster_index):
|
26 |
+
cluster_terms = [terms[ind] for ind in order_centroids[cluster_index, :10]]
|
27 |
+
return cluster_terms
|
28 |
+
|
29 |
+
order_centroids = model.cluster_centers_.argsort()[:, ::-1]
|
30 |
+
terms = vectorizer.get_feature_names_out()
|
31 |
+
|
32 |
+
print(print_cluster(show_recommendations("red dress")[0]))
|
33 |
+
print(print_cluster(show_recommendations("water")[0]))
|
34 |
+
print(print_cluster(show_recommendations("shoes")[0]))
|
35 |
+
print(print_cluster(show_recommendations("cutting tool")[0]))
|
36 |
+
|
37 |
+
pickle.dump(model, open("model.pkl", "wb"))
|
38 |
+
pickle.dump(vectorizer, open("vectorizer.pkl", "wb"))
|
39 |
+
|
40 |
+
|
41 |
+
|
vectorizer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7ba96577981c278c57616ebfe977663c2e82e0be3e32282a517a50baaa99b35
|
3 |
+
size 272049
|