Spaces:
Configuration error
Configuration error
Upload 3 files
Browse files- README.md +29 -12
- app.py +139 -0
- requirements.txt +8 -0
README.md
CHANGED
@@ -1,12 +1,29 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# AI Product Search Agent
|
2 |
+
|
3 |
+
This is a fully free, deployable AI tool built with Streamlit and LangChain that searches Amazon cellphone accessories using natural language.
|
4 |
+
|
5 |
+
## Features
|
6 |
+
|
7 |
+
- Natural language understanding
|
8 |
+
- Vector search (FAISS + sentence-transformers)
|
9 |
+
- Price and keyword filtering
|
10 |
+
- Conversational agent interface
|
11 |
+
- Deployed on Hugging Face Spaces
|
12 |
+
|
13 |
+
## How to Run
|
14 |
+
|
15 |
+
### Locally
|
16 |
+
|
17 |
+
```bash
|
18 |
+
pip install -r requirements.txt
|
19 |
+
streamlit run app.py
|
20 |
+
```
|
21 |
+
|
22 |
+
### Hugging Face Spaces
|
23 |
+
|
24 |
+
1. Create a new Space (Streamlit)
|
25 |
+
2. Upload all files
|
26 |
+
3. Add your `secrets.toml` with an OpenRouter or OpenAI API key
|
27 |
+
4. Deploy!
|
28 |
+
|
29 |
+
Enjoy your free AI agent!
|
app.py
ADDED
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
import faiss
|
5 |
+
import re
|
6 |
+
import ast
|
7 |
+
import os
|
8 |
+
import urllib.request
|
9 |
+
|
10 |
+
from sentence_transformers import SentenceTransformer
|
11 |
+
from sentence_transformers.util import cos_sim
|
12 |
+
from langchain.chat_models import ChatOpenAI
|
13 |
+
from langchain.agents import initialize_agent, AgentType, tool
|
14 |
+
from streamlit_chat import message
|
15 |
+
|
16 |
+
# ---------------------------
|
17 |
+
# Configuration
|
18 |
+
# ---------------------------
|
19 |
+
st.set_page_config(page_title="📱 AI Product Search Agent", layout="wide")
|
20 |
+
|
21 |
+
# ---------------------------
|
22 |
+
# Load model
|
23 |
+
# ---------------------------
|
24 |
+
@st.cache_resource
|
25 |
+
def load_model():
|
26 |
+
return SentenceTransformer("all-MiniLM-L6-v2")
|
27 |
+
|
28 |
+
# ---------------------------
|
29 |
+
# Load dataset and FAISS index
|
30 |
+
# ---------------------------
|
31 |
+
@st.cache_data
|
32 |
+
def load_data():
|
33 |
+
parquet_url = "https://huggingface.co/datasets/McAuley-Lab/Amazon-Reviews-2023/resolve/main/raw_meta_Cell_Phones_and_Accessories/full-00000-of-00007.parquet"
|
34 |
+
df = pd.read_parquet(parquet_url)
|
35 |
+
|
36 |
+
index_url = "https://huggingface.co/GovinKin/MGTA415database/resolve/main/cellphones_index.faiss"
|
37 |
+
local_index_path = "cellphones_index.faiss"
|
38 |
+
if not os.path.exists(local_index_path):
|
39 |
+
urllib.request.urlretrieve(index_url, local_index_path)
|
40 |
+
|
41 |
+
index = faiss.read_index(local_index_path)
|
42 |
+
return df, index
|
43 |
+
|
44 |
+
# ---------------------------
|
45 |
+
# Search functions
|
46 |
+
# ---------------------------
|
47 |
+
def search(query, model, df, index, top_k=10):
|
48 |
+
query_vector = model.encode([query]).astype("float32")
|
49 |
+
distances, indices = index.search(query_vector, k=top_k)
|
50 |
+
results = df.iloc[indices[0]].copy()
|
51 |
+
results["distance"] = distances[0]
|
52 |
+
return results
|
53 |
+
|
54 |
+
def search_plus(query, model, df, index, top_k=20):
|
55 |
+
results = search(query, model, df, index, top_k=top_k)
|
56 |
+
|
57 |
+
price_match = re.search(r"(under|below)\s*\$?(\d+)", query.lower())
|
58 |
+
price_under = float(price_match.group(2)) if price_match else None
|
59 |
+
|
60 |
+
if price_under:
|
61 |
+
try:
|
62 |
+
results["price"] = results["price"].astype(float)
|
63 |
+
results = results[results["price"] < price_under]
|
64 |
+
except:
|
65 |
+
pass
|
66 |
+
|
67 |
+
stop_words = {"i", "want", "need", "the", "a", "for", "with", "to", "is", "it", "on", "of", "buy", "and", "in"}
|
68 |
+
keywords = [kw for kw in query.lower().split() if kw not in stop_words and len(kw) > 2]
|
69 |
+
|
70 |
+
if not results.empty and keywords:
|
71 |
+
pattern = '|'.join(map(re.escape, keywords))
|
72 |
+
results = results[results["title"].str.lower().str.contains(pattern, na=False)]
|
73 |
+
|
74 |
+
return results
|
75 |
+
|
76 |
+
def rerank_by_similarity(query, results, model, top_n=5):
|
77 |
+
if results.empty:
|
78 |
+
return results
|
79 |
+
query_vec = model.encode([query], convert_to_tensor=True)
|
80 |
+
titles = results["title"].astype(str).tolist()
|
81 |
+
title_vecs = model.encode(titles, convert_to_tensor=True)
|
82 |
+
scores = cos_sim(query_vec, title_vecs)[0].cpu().numpy()
|
83 |
+
results["similarity"] = scores
|
84 |
+
return results.sort_values("similarity", ascending=False).head(top_n)
|
85 |
+
|
86 |
+
# ---------------------------
|
87 |
+
# Agent Tool: wraps search_plus
|
88 |
+
# ---------------------------
|
89 |
+
@tool
|
90 |
+
def product_search_tool(query: str) -> str:
|
91 |
+
"""Search for cellphone accessories using a natural query."""
|
92 |
+
results = search_plus(query, model, df_all, index, top_k=10)
|
93 |
+
if results.empty:
|
94 |
+
return "No results found."
|
95 |
+
return "\n".join(results["title"].head(5).tolist())
|
96 |
+
|
97 |
+
# ---------------------------
|
98 |
+
# Load all resources
|
99 |
+
# ---------------------------
|
100 |
+
model = load_model()
|
101 |
+
df_all, index = load_data()
|
102 |
+
|
103 |
+
# ---------------------------
|
104 |
+
# Agent setup
|
105 |
+
# ---------------------------
|
106 |
+
import os
|
107 |
+
os.environ["OPENAI_API_KEY"] = st.secrets["openai"]["api_key"]
|
108 |
+
os.environ["OPENAI_API_BASE"] = st.secrets["openai"].get("base_url", "https://api.openai.com/v1")
|
109 |
+
|
110 |
+
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
|
111 |
+
agent = initialize_agent(
|
112 |
+
tools=[product_search_tool],
|
113 |
+
llm=llm,
|
114 |
+
agent=AgentType.OPENAI_FUNCTIONS,
|
115 |
+
verbose=True
|
116 |
+
)
|
117 |
+
|
118 |
+
# ---------------------------
|
119 |
+
# Streamlit Chat Interface
|
120 |
+
# ---------------------------
|
121 |
+
st.title("🤖 AI Product Search Agent")
|
122 |
+
st.markdown("Ask natural questions like 'cheap rugged iPhone case under $30'")
|
123 |
+
|
124 |
+
if "chat_history" not in st.session_state:
|
125 |
+
st.session_state.chat_history = []
|
126 |
+
|
127 |
+
user_input = st.chat_input("Ask about cellphone accessories...")
|
128 |
+
|
129 |
+
if user_input:
|
130 |
+
st.session_state.chat_history.append(("user", user_input))
|
131 |
+
with st.spinner("Agent is thinking..."):
|
132 |
+
try:
|
133 |
+
reply = agent.run(user_input)
|
134 |
+
except Exception as e:
|
135 |
+
reply = f"⚠️ Agent error: {e}"
|
136 |
+
st.session_state.chat_history.append(("agent", reply))
|
137 |
+
|
138 |
+
for role, msg in st.session_state.chat_history:
|
139 |
+
message(msg, is_user=(role == "user"))
|
requirements.txt
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
streamlit-chat
|
3 |
+
langchain
|
4 |
+
openai
|
5 |
+
sentence-transformers
|
6 |
+
faiss-cpu
|
7 |
+
pandas
|
8 |
+
numpy
|