apamplona2011 commited on
Commit
c2f3c5f
·
verified ·
1 Parent(s): 928619c

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +29 -12
  2. app.py +139 -0
  3. requirements.txt +8 -0
README.md CHANGED
@@ -1,12 +1,29 @@
1
- ---
2
- title: amazoncomparisontool
3
- emoji: 🐳
4
- colorFrom: gray
5
- colorTo: blue
6
- sdk: static
7
- pinned: false
8
- tags:
9
- - deepsite
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Product Search Agent
2
+
3
+ This is a fully free, deployable AI tool built with Streamlit and LangChain that searches Amazon cellphone accessories using natural language.
4
+
5
+ ## Features
6
+
7
+ - Natural language understanding
8
+ - Vector search (FAISS + sentence-transformers)
9
+ - Price and keyword filtering
10
+ - Conversational agent interface
11
+ - Deployed on Hugging Face Spaces
12
+
13
+ ## How to Run
14
+
15
+ ### Locally
16
+
17
+ ```bash
18
+ pip install -r requirements.txt
19
+ streamlit run app.py
20
+ ```
21
+
22
+ ### Hugging Face Spaces
23
+
24
+ 1. Create a new Space (Streamlit)
25
+ 2. Upload all files
26
+ 3. Add your `secrets.toml` with an OpenRouter or OpenAI API key
27
+ 4. Deploy!
28
+
29
+ Enjoy your free AI agent!
app.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import faiss
5
+ import re
6
+ import ast
7
+ import os
8
+ import urllib.request
9
+
10
+ from sentence_transformers import SentenceTransformer
11
+ from sentence_transformers.util import cos_sim
12
+ from langchain.chat_models import ChatOpenAI
13
+ from langchain.agents import initialize_agent, AgentType, tool
14
+ from streamlit_chat import message
15
+
16
+ # ---------------------------
17
+ # Configuration
18
+ # ---------------------------
19
+ st.set_page_config(page_title="📱 AI Product Search Agent", layout="wide")
20
+
21
+ # ---------------------------
22
+ # Load model
23
+ # ---------------------------
24
+ @st.cache_resource
25
+ def load_model():
26
+ return SentenceTransformer("all-MiniLM-L6-v2")
27
+
28
+ # ---------------------------
29
+ # Load dataset and FAISS index
30
+ # ---------------------------
31
+ @st.cache_data
32
+ def load_data():
33
+ parquet_url = "https://huggingface.co/datasets/McAuley-Lab/Amazon-Reviews-2023/resolve/main/raw_meta_Cell_Phones_and_Accessories/full-00000-of-00007.parquet"
34
+ df = pd.read_parquet(parquet_url)
35
+
36
+ index_url = "https://huggingface.co/GovinKin/MGTA415database/resolve/main/cellphones_index.faiss"
37
+ local_index_path = "cellphones_index.faiss"
38
+ if not os.path.exists(local_index_path):
39
+ urllib.request.urlretrieve(index_url, local_index_path)
40
+
41
+ index = faiss.read_index(local_index_path)
42
+ return df, index
43
+
44
+ # ---------------------------
45
+ # Search functions
46
+ # ---------------------------
47
+ def search(query, model, df, index, top_k=10):
48
+ query_vector = model.encode([query]).astype("float32")
49
+ distances, indices = index.search(query_vector, k=top_k)
50
+ results = df.iloc[indices[0]].copy()
51
+ results["distance"] = distances[0]
52
+ return results
53
+
54
+ def search_plus(query, model, df, index, top_k=20):
55
+ results = search(query, model, df, index, top_k=top_k)
56
+
57
+ price_match = re.search(r"(under|below)\s*\$?(\d+)", query.lower())
58
+ price_under = float(price_match.group(2)) if price_match else None
59
+
60
+ if price_under:
61
+ try:
62
+ results["price"] = results["price"].astype(float)
63
+ results = results[results["price"] < price_under]
64
+ except:
65
+ pass
66
+
67
+ stop_words = {"i", "want", "need", "the", "a", "for", "with", "to", "is", "it", "on", "of", "buy", "and", "in"}
68
+ keywords = [kw for kw in query.lower().split() if kw not in stop_words and len(kw) > 2]
69
+
70
+ if not results.empty and keywords:
71
+ pattern = '|'.join(map(re.escape, keywords))
72
+ results = results[results["title"].str.lower().str.contains(pattern, na=False)]
73
+
74
+ return results
75
+
76
+ def rerank_by_similarity(query, results, model, top_n=5):
77
+ if results.empty:
78
+ return results
79
+ query_vec = model.encode([query], convert_to_tensor=True)
80
+ titles = results["title"].astype(str).tolist()
81
+ title_vecs = model.encode(titles, convert_to_tensor=True)
82
+ scores = cos_sim(query_vec, title_vecs)[0].cpu().numpy()
83
+ results["similarity"] = scores
84
+ return results.sort_values("similarity", ascending=False).head(top_n)
85
+
86
+ # ---------------------------
87
+ # Agent Tool: wraps search_plus
88
+ # ---------------------------
89
+ @tool
90
+ def product_search_tool(query: str) -> str:
91
+ """Search for cellphone accessories using a natural query."""
92
+ results = search_plus(query, model, df_all, index, top_k=10)
93
+ if results.empty:
94
+ return "No results found."
95
+ return "\n".join(results["title"].head(5).tolist())
96
+
97
+ # ---------------------------
98
+ # Load all resources
99
+ # ---------------------------
100
+ model = load_model()
101
+ df_all, index = load_data()
102
+
103
+ # ---------------------------
104
+ # Agent setup
105
+ # ---------------------------
106
+ import os
107
+ os.environ["OPENAI_API_KEY"] = st.secrets["openai"]["api_key"]
108
+ os.environ["OPENAI_API_BASE"] = st.secrets["openai"].get("base_url", "https://api.openai.com/v1")
109
+
110
+ llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.3)
111
+ agent = initialize_agent(
112
+ tools=[product_search_tool],
113
+ llm=llm,
114
+ agent=AgentType.OPENAI_FUNCTIONS,
115
+ verbose=True
116
+ )
117
+
118
+ # ---------------------------
119
+ # Streamlit Chat Interface
120
+ # ---------------------------
121
+ st.title("🤖 AI Product Search Agent")
122
+ st.markdown("Ask natural questions like 'cheap rugged iPhone case under $30'")
123
+
124
+ if "chat_history" not in st.session_state:
125
+ st.session_state.chat_history = []
126
+
127
+ user_input = st.chat_input("Ask about cellphone accessories...")
128
+
129
+ if user_input:
130
+ st.session_state.chat_history.append(("user", user_input))
131
+ with st.spinner("Agent is thinking..."):
132
+ try:
133
+ reply = agent.run(user_input)
134
+ except Exception as e:
135
+ reply = f"⚠️ Agent error: {e}"
136
+ st.session_state.chat_history.append(("agent", reply))
137
+
138
+ for role, msg in st.session_state.chat_history:
139
+ message(msg, is_user=(role == "user"))
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ streamlit-chat
3
+ langchain
4
+ openai
5
+ sentence-transformers
6
+ faiss-cpu
7
+ pandas
8
+ numpy