Nasma commited on
Commit
0c6a2fb
Β·
verified Β·
1 Parent(s): d93bd76

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +244 -244
main.py CHANGED
@@ -1,244 +1,244 @@
1
- # main.py
2
- from fastapi import FastAPI
3
- import os
4
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
- from langchain.schema import Document
6
- from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
-
8
- app = FastAPI()
9
-
10
- if "GOOGLE_API_KEY" not in os.environ:
11
- os.environ["GOOGLE_API_KEY"] = "AIzaSyDeyTMR8zf574760YBz6W34m1CcEONsuSE"
12
- embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
13
-
14
-
15
-
16
- docs = [
17
- Document(
18
- page_content="",
19
- metadata={
20
- "firstname": "Pathum",
21
- "lastname": "Lakshan",
22
- "gender": "Male",
23
- "skills": "Spring Boot, Node.js, NestJS, Java, JavaScript, MongoDB, MySQL, Docker, AWS, GCP, Apache Kafka, Redis, REST APIs, CI/CD, JWT, OAuth2, GitHub, Agile Methodologies, Software Architecture, Cybersecurity, DevOps, Web Development",
24
- "industry": "Software Development, IT",
25
- "position": "Associate Software Engineer, Software Developer",
26
- "home_town": "Colombo, Sri Lanka",
27
- "home_address": "Colombo, Sri Lanka",
28
- "experience": "2 years",
29
- "education": "B.Sc. (Hons) Computer Science and Software Engineering",
30
- "years_of_experience": 2,
31
- },
32
- ),
33
- Document(
34
- page_content="",
35
- metadata={
36
- "firstname": "Prasanna",
37
- "lastname": "Ileperuma",
38
- "gender": "Male",
39
- "skills": "Project Management, Problem Solving, Computer Literacy, Creative Design, Adaptability, Communication, Operating Analytical Instruments",
40
- "industry": "Laboratory, Chemical",
41
- "position": "Intern Biyagama Water Treatment Plant, Research Assistant",
42
- "home_town": "Atabage",
43
- "home_address": "245/c, Anugurumulla Lower Division, Wattahena, Atabage, 20500",
44
- "experience": "2 years",
45
- "education": "B.Sc. (Hons) in Chemistry",
46
- "years_of_experience": 1,
47
- },
48
- ),
49
- Document(
50
- page_content="",
51
- metadata={
52
- "firstname": "Indika",
53
- "lastname": "Madushankha",
54
- "gender": "Male",
55
- "skills": "Quality Control, Pharmaceutical Instrumentation, Stability Analysis, Root Cause Analysis, GMP Certification, ISO 9001:2015 Certification, Method Development, Validation, HPLC, GC, UV, FT-IR, Dissolution Tester, Karl Fisher Potentiometer, Analytical Method Development, Problem-Solving, Communication Skills, Networking",
56
- "industry": "Pharmaceutical, Quality Control",
57
- "position": "Senior Quality Executive",
58
- "home_town": "Kadawatha, Sri Lanka",
59
- "home_address": "288, Dalupitiya, Kadawatha, Sri Lanka",
60
- "experience": "7 years",
61
- "education": "B.Sc. (Hons) in Chemistry (Second Class), University of Jaffna; MBA (Ongoing), University of Kelaniya",
62
- "years_of_experience": 7,
63
- },
64
- ),
65
- Document(
66
- page_content="",
67
- metadata={
68
- "firstname": "Chamadhi",
69
- "lastname": "Atapattu Arachchi",
70
- "gender": "Male",
71
- "skills": "Molecular Biology Techniques, Microbiological Analysis, Laboratory Management, Data Analysis, Documentation and Record Keeping, Team Collaboration, Problem Solving, Communication Skills, Adaptability",
72
- "industry": "Biotechnology, Microbiology",
73
- "position": "Quality Assurance and Laboratory Officer - Junior Executive, Laboratory Trainee, Lab Assistant",
74
- "home_town": "Malabe, Sri Lanka",
75
- "home_address": "", # Not specified in the CV
76
- "experience": "Approx. 2 years (based on internships and work experience)",
77
- "education": "B.Sc. Special (Hons) in Biotechnology, Sri Lanka Institute of Information Technology",
78
- "years_of_experience": 2, # Based on provided details of roles and internships
79
- },
80
- ),
81
- Document(
82
- page_content="",
83
- metadata={
84
- "firstname": "Mohamed Naeem",
85
- "lastname": "A. Mubarak",
86
- "gender": "Male",
87
- "skills": "Laboratory Management, Analytical Chemistry, Calibration, Operation and Maintenance of High-end Analytical Instruments, ISO/IEC 17025:2017 Accreditation, Technical Assessment, Quality System Consulting, AMV Training, Research and Development, Problem-solving, Team Leadership, Strategic Planning, Time Management, Technical Reporting",
88
- "industry": "Analytical Chemistry, Laboratory Management",
89
- "position": "Laboratory Director, Principal Research Scientist, Chartered Chemist, Technical Assessor",
90
- "home_town": "Colombo, Sri Lanka",
91
- "home_address": "No: 69/2, Raja Mawatha, Ratmalana, Sri Lanka",
92
- "experience": "25+ years",
93
- "education": "B.Sc. Special (Hons) in Analytical Chemistry, University of Ruhuna; MSc in Integrated Water Resources Management, UNESCO-IHE Institute for Water Education",
94
- "years_of_experience": 25,
95
- },
96
- ),
97
- Document(
98
- page_content="",
99
- metadata={
100
- "firstname": "Virantha",
101
- "lastname": "Dasanayake",
102
- "gender": "Male",
103
- "skills": "Angular, HTML, CSS, Typescript, Data Analytics, Bootstrap, PrimeNG, Flutter, Node.js, Sails.js, C#, ASP.NET Core, JIRA, Azure DevOps, Git, GitHub, GitLab, Bitbucket, Google Cloud Platform, Figma, AdobeXD, MySQL, PostgreSQL, Google Tag Manager, Google Analytics, Firebase",
104
- "industry": "Software Engineering",
105
- "position": "Senior Software Engineer",
106
- "home_town": "Gampaha",
107
- "home_address": "87/D/2, Flower Terrace, Kehelbaddara, Gampaha",
108
- "experience": "Senior Software Engineer at LB Finance (Feb 2023 - Present), Software Engineer at Electrily (Sep 2021 - Feb 2023), Electrical Engineer Intern at KIK Lanka (Sep 2019 - Dec 2019)",
109
- "education": "BSc. Electrical Engineering Honours Degree, University of Moratuwa (2017 - 2021), G.C.E Advanced Level, Bandaranayake College (2012 - 2014)",
110
- "years_of_experience": 3,
111
- },
112
- ),
113
- Document(
114
- page_content="",
115
- metadata={
116
- "firstname": "Geesara",
117
- "lastname": "Siriwardhana",
118
- "gender": "Female",
119
- "skills": "JAVA, Spring Boot, SpringMVC, Google Cloud Platform, JavaScript, jQuery, MySQL, Git, JPA, ScrumMaster, Agile/JIRA, Jenkins, CI/CD, Windows, Linux, SonarQube, Docker, Kubernetes, Microservices",
120
- "industry": "Software Engineering",
121
- "position": "Technical Specialist",
122
- "home_town": "Colombo",
123
- "home_address": "",
124
- "experience": "Technical Specialist at LOLC Technologies Services Limited (October 2023 - Present), Senior Software Engineer at LOLC Technologies Services Limited (May 2022 - September 2023), Software Engineer at LOLC Technologies Services Limited (September 2021 - April 2022), Software Engineer Trainee at LOLC Technologies Services Limited (September 2017 - September 2021)",
125
- "education": "Bachelor of Engineering (BEng) Honors in Software Engineering, University of Westminster, Sri Lanka (2017β€”2021)",
126
- "years_of_experience": 6,
127
- },
128
- ),
129
- Document(
130
- page_content="",
131
- metadata={
132
- "firstname": "Irosh",
133
- "lastname": "Rupasinghe",
134
- "gender": "male",
135
- "skills": "Java Programming, Problem-solving, Time Management, Communication, Performance Optimization, Scalability Optimization, Agile Methodologies, Spring, Spring Boot, Hibernate, WSO2 Integration Platforms, REST, SOAP, JSON, XML, XSD, XPath, XSLT, NodeJS, Angular, Typescript, Salesforce Development, MongoDB, MySQL, H2, DB2, Oracle, MSSQL, CI/CD Processes, Team Collaboration Platforms",
136
- "industry": "Software Development and Technology",
137
- "position": "Senior Software Engineer",
138
- "home_town": "Colombo",
139
- "home_address": "",
140
- "experience": "Senior Software Engineer at ICP Techno LLC (08/2023 – Present), Tech Lead at Jetwing Travels (11/2018 – 07/2023), Senior Engineer-Technology at Virtusa (01/2017 – 08/2018), Engineer-Technology at Virtusa (07/2015 – 01/2017), Associate Engineer-Technology at Virtusa (11/2014 – 07/2015)",
141
- "education": "MSc Data Science (Reading), Cardiff Metropolitan University - UK (08/2024 – Ongoing), BEng in Software Engineering, IIC University of Technology, Cambodia (08/2018 – 08/2021), BSc in Information Technology (Specialized in Software Engineering), Java Institute, Sri Lanka (08/2011 – 10/2014)",
142
- "years_of_experience": 9,
143
- },
144
- ),
145
- ]
146
-
147
- # Extracting document metadata for embedding
148
- metadata = [str(doc.metadata) for doc in docs] # Assuming doc.metadata is a dictionary or object
149
-
150
- # Generate embeddings for the metadata
151
- doc_vectors = embeddings.embed_documents(metadata)
152
-
153
- # Output the number of documents and the size of one vector
154
- print(f"Number of documents: {len(doc_vectors)}")
155
- print(f"Size of each embedding vector: {len(doc_vectors[0])}")
156
-
157
- import re
158
- from sklearn.metrics.pairwise import cosine_similarity
159
-
160
- # Function to normalize the query
161
- def normalize_query(query):
162
- """Normalize the query string."""
163
- return query.lower().strip()
164
-
165
- # Function to extract role and location from the query
166
- def extract_query_components(query):
167
- """
168
- Extract role and location from the user query.
169
- Assumes the query is of the form 'Need a [position] home town from [hometown]'.
170
- """
171
- match = re.search(r"Need a (.+?) home town from (.+)", query, re.IGNORECASE)
172
- if match:
173
- role = match.group(1).strip()
174
- location = match.group(2).strip()
175
- return role, location
176
- return None, None
177
-
178
- # Function to pre-filter documents based on role and location
179
- def pre_filter_docs(normalized_query, docs):
180
- """
181
- Pre-filter documents based on role and location.
182
- """
183
- role, location = extract_query_components(normalized_query)
184
- if not role or not location:
185
- return docs # If role/location not found, return all docs
186
-
187
- # Filter documents matching the role and location
188
- filtered = [
189
- doc for doc in docs
190
- if role.lower() in doc.metadata.get("position", "").lower()
191
- and location.lower() in doc.metadata.get("home_town", "").lower()
192
- ]
193
- return filtered
194
-
195
- # Example usage
196
- query = "Need a software Engineer home town from Gampaha"
197
-
198
- # Normalize query
199
- normalized_query = normalize_query(query)
200
-
201
- # Embed query
202
- query_vector = embeddings.embed_query(normalized_query)
203
-
204
- # Pre-filter documents
205
- filtered_docs = pre_filter_docs(normalized_query, docs)
206
-
207
- # Re-rank filtered documents
208
- if filtered_docs:
209
- # Filter corresponding document vectors
210
- filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
211
-
212
- # Compute similarities
213
- similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
214
-
215
- # Rank documents
216
- ranked_docs = sorted(
217
- zip(similarities, filtered_docs),
218
- key=lambda x: x[0],
219
- reverse=True
220
- )
221
-
222
- print("Top Matches:")
223
- for score, doc in ranked_docs[:3]:
224
- print(f"Score: {score:.4f}, Content: {doc.metadata}")
225
- else:
226
- print("No relevant documents found.")
227
-
228
-
229
- @app.get("/search")
230
- def search(query: str):
231
- query = normalize_query(query)
232
- query_vector = embeddings.embed_query(query)
233
- filtered_docs = pre_filter_docs(query, docs)
234
-
235
- if filtered_docs:
236
- filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
237
- similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
238
- ranked_docs = sorted(zip(similarities, filtered_docs), key=lambda x: x[0], reverse=True)
239
- return [{"score": score, "content": doc.metadata} for score, doc in ranked_docs[:3]]
240
- return {"message": "No relevant documents found."}
241
-
242
-
243
-
244
-
 
1
+ # main.py
2
+ from fastapi import FastAPI
3
+ import os
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ from langchain.schema import Document
6
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
7
+
8
+ app = FastAPI()
9
+
10
+ if "GOOGLE_API_KEY" not in os.environ:
11
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyDeyTMR8zf574760YBz6W34m1CcEONsuSE"
12
+ embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
13
+
14
+
15
+
16
+ docs = [
17
+ Document(
18
+ page_content="",
19
+ metadata={
20
+ "firstname": "Pathum",
21
+ "lastname": "Lakshan",
22
+ "gender": "Male",
23
+ "skills": "Spring Boot, Node.js, NestJS, Java, JavaScript, MongoDB, MySQL, Docker, AWS, GCP, Apache Kafka, Redis, REST APIs, CI/CD, JWT, OAuth2, GitHub, Agile Methodologies, Software Architecture, Cybersecurity, DevOps, Web Development",
24
+ "industry": "Software Development, IT",
25
+ "position": "Associate Software Engineer, Software Developer",
26
+ "home_town": "Colombo, Sri Lanka",
27
+ "home_address": "Colombo, Sri Lanka",
28
+ "experience": "2 years",
29
+ "education": "B.Sc. (Hons) Computer Science and Software Engineering",
30
+ "years_of_experience": 2,
31
+ },
32
+ ),
33
+ Document(
34
+ page_content="",
35
+ metadata={
36
+ "firstname": "Prasanna",
37
+ "lastname": "Ileperuma",
38
+ "gender": "Male",
39
+ "skills": "Project Management, Problem Solving, Computer Literacy, Creative Design, Adaptability, Communication, Operating Analytical Instruments",
40
+ "industry": "Laboratory, Chemical",
41
+ "position": "Intern Biyagama Water Treatment Plant, Research Assistant",
42
+ "home_town": "Atabage",
43
+ "home_address": "245/c, Anugurumulla Lower Division, Wattahena, Atabage, 20500",
44
+ "experience": "2 years",
45
+ "education": "B.Sc. (Hons) in Chemistry",
46
+ "years_of_experience": 1,
47
+ },
48
+ ),
49
+ Document(
50
+ page_content="",
51
+ metadata={
52
+ "firstname": "Indika",
53
+ "lastname": "Madushankha",
54
+ "gender": "Male",
55
+ "skills": "Quality Control, Pharmaceutical Instrumentation, Stability Analysis, Root Cause Analysis, GMP Certification, ISO 9001:2015 Certification, Method Development, Validation, HPLC, GC, UV, FT-IR, Dissolution Tester, Karl Fisher Potentiometer, Analytical Method Development, Problem-Solving, Communication Skills, Networking",
56
+ "industry": "Pharmaceutical, Quality Control",
57
+ "position": "Senior Quality Executive",
58
+ "home_town": "Kadawatha, Sri Lanka",
59
+ "home_address": "288, Dalupitiya, Kadawatha, Sri Lanka",
60
+ "experience": "7 years",
61
+ "education": "B.Sc. (Hons) in Chemistry (Second Class), University of Jaffna; MBA (Ongoing), University of Kelaniya",
62
+ "years_of_experience": 7,
63
+ },
64
+ ),
65
+ Document(
66
+ page_content="",
67
+ metadata={
68
+ "firstname": "Chamadhi",
69
+ "lastname": "Atapattu Arachchi",
70
+ "gender": "Male",
71
+ "skills": "Molecular Biology Techniques, Microbiological Analysis, Laboratory Management, Data Analysis, Documentation and Record Keeping, Team Collaboration, Problem Solving, Communication Skills, Adaptability",
72
+ "industry": "Biotechnology, Microbiology",
73
+ "position": "Quality Assurance and Laboratory Officer - Junior Executive, Laboratory Trainee, Lab Assistant",
74
+ "home_town": "Malabe, Sri Lanka",
75
+ "home_address": "", # Not specified in the CV
76
+ "experience": "Approx. 2 years (based on internships and work experience)",
77
+ "education": "B.Sc. Special (Hons) in Biotechnology, Sri Lanka Institute of Information Technology",
78
+ "years_of_experience": 2, # Based on provided details of roles and internships
79
+ },
80
+ ),
81
+ Document(
82
+ page_content="",
83
+ metadata={
84
+ "firstname": "Mohamed Naeem",
85
+ "lastname": "A. Mubarak",
86
+ "gender": "Male",
87
+ "skills": "Laboratory Management, Analytical Chemistry, Calibration, Operation and Maintenance of High-end Analytical Instruments, ISO/IEC 17025:2017 Accreditation, Technical Assessment, Quality System Consulting, AMV Training, Research and Development, Problem-solving, Team Leadership, Strategic Planning, Time Management, Technical Reporting",
88
+ "industry": "Analytical Chemistry, Laboratory Management",
89
+ "position": "Laboratory Director, Principal Research Scientist, Chartered Chemist, Technical Assessor",
90
+ "home_town": "Colombo, Sri Lanka",
91
+ "home_address": "No: 69/2, Raja Mawatha, Ratmalana, Sri Lanka",
92
+ "experience": "25+ years",
93
+ "education": "B.Sc. Special (Hons) in Analytical Chemistry, University of Ruhuna; MSc in Integrated Water Resources Management, UNESCO-IHE Institute for Water Education",
94
+ "years_of_experience": 25,
95
+ },
96
+ ),
97
+ Document(
98
+ page_content="",
99
+ metadata={
100
+ "firstname": "Virantha",
101
+ "lastname": "Dasanayake",
102
+ "gender": "Male",
103
+ "skills": "Angular, HTML, CSS, Typescript, Data Analytics, Bootstrap, PrimeNG, Flutter, Node.js, Sails.js, C#, ASP.NET Core, JIRA, Azure DevOps, Git, GitHub, GitLab, Bitbucket, Google Cloud Platform, Figma, AdobeXD, MySQL, PostgreSQL, Google Tag Manager, Google Analytics, Firebase",
104
+ "industry": "Software Engineering",
105
+ "position": "Senior Software Engineer",
106
+ "home_town": "Gampaha",
107
+ "home_address": "87/D/2, Flower Terrace, Kehelbaddara, Gampaha",
108
+ "experience": "Senior Software Engineer at LB Finance (Feb 2023 - Present), Software Engineer at Electrily (Sep 2021 - Feb 2023), Electrical Engineer Intern at KIK Lanka (Sep 2019 - Dec 2019)",
109
+ "education": "BSc. Electrical Engineering Honours Degree, University of Moratuwa (2017 - 2021), G.C.E Advanced Level, Bandaranayake College (2012 - 2014)",
110
+ "years_of_experience": 3,
111
+ },
112
+ ),
113
+ Document(
114
+ page_content="",
115
+ metadata={
116
+ "firstname": "Geesara",
117
+ "lastname": "Siriwardhana",
118
+ "gender": "Female",
119
+ "skills": "JAVA, Spring Boot, SpringMVC, Google Cloud Platform, JavaScript, jQuery, MySQL, Git, JPA, ScrumMaster, Agile/JIRA, Jenkins, CI/CD, Windows, Linux, SonarQube, Docker, Kubernetes, Microservices",
120
+ "industry": "Software Engineering",
121
+ "position": "Technical Specialist",
122
+ "home_town": "Colombo",
123
+ "home_address": "",
124
+ "experience": "Technical Specialist at LOLC Technologies Services Limited (October 2023 - Present), Senior Software Engineer at LOLC Technologies Services Limited (May 2022 - September 2023), Software Engineer at LOLC Technologies Services Limited (September 2021 - April 2022), Software Engineer Trainee at LOLC Technologies Services Limited (September 2017 - September 2021)",
125
+ "education": "Bachelor of Engineering (BEng) Honors in Software Engineering, University of Westminster, Sri Lanka (2017β€”2021)",
126
+ "years_of_experience": 6,
127
+ },
128
+ ),
129
+ Document(
130
+ page_content="",
131
+ metadata={
132
+ "firstname": "Irosh",
133
+ "lastname": "Rupasinghe",
134
+ "gender": "male",
135
+ "skills": "Java Programming, Problem-solving, Time Management, Communication, Performance Optimization, Scalability Optimization, Agile Methodologies, Spring, Spring Boot, Hibernate, WSO2 Integration Platforms, REST, SOAP, JSON, XML, XSD, XPath, XSLT, NodeJS, Angular, Typescript, Salesforce Development, MongoDB, MySQL, H2, DB2, Oracle, MSSQL, CI/CD Processes, Team Collaboration Platforms",
136
+ "industry": "Software Development and Technology",
137
+ "position": "Senior Software Engineer",
138
+ "home_town": "Colombo",
139
+ "home_address": "",
140
+ "experience": "Senior Software Engineer at ICP Techno LLC (08/2023 – Present), Tech Lead at Jetwing Travels (11/2018 – 07/2023), Senior Engineer-Technology at Virtusa (01/2017 – 08/2018), Engineer-Technology at Virtusa (07/2015 – 01/2017), Associate Engineer-Technology at Virtusa (11/2014 – 07/2015)",
141
+ "education": "MSc Data Science (Reading), Cardiff Metropolitan University - UK (08/2024 – Ongoing), BEng in Software Engineering, IIC University of Technology, Cambodia (08/2018 – 08/2021), BSc in Information Technology (Specialized in Software Engineering), Java Institute, Sri Lanka (08/2011 – 10/2014)",
142
+ "years_of_experience": 9,
143
+ },
144
+ ),
145
+ ]
146
+
147
+ # Extracting document metadata for embedding
148
+ metadata = [str(doc.metadata) for doc in docs] # Assuming doc.metadata is a dictionary or object
149
+
150
+ # Generate embeddings for the metadata
151
+ doc_vectors = embeddings.embed_documents(metadata)
152
+
153
+ # Output the number of documents and the size of one vector
154
+ print(f"Number of documents: {len(doc_vectors)}")
155
+ print(f"Size of each embedding vector: {len(doc_vectors[0])}")
156
+
157
+ import re
158
+ from sklearn.metrics.pairwise import cosine_similarity
159
+
160
+ # Function to normalize the query
161
+ def normalize_query(query):
162
+ """Normalize the query string."""
163
+ return query.lower().strip()
164
+
165
+ # Function to extract role and location from the query
166
+ def extract_query_components(query):
167
+ """
168
+ Extract role and location from the user query.
169
+ Assumes the query is of the form 'Need a [position] home town from [hometown]'.
170
+ """
171
+ match = re.search(r"Need a (.+?) home town from (.+)", query, re.IGNORECASE)
172
+ if match:
173
+ role = match.group(1).strip()
174
+ location = match.group(2).strip()
175
+ return role, location
176
+ return None, None
177
+
178
+ # Function to pre-filter documents based on role and location
179
+ def pre_filter_docs(normalized_query, docs):
180
+ """
181
+ Pre-filter documents based on role and location.
182
+ """
183
+ role, location = extract_query_components(normalized_query)
184
+ if not role or not location:
185
+ return docs # If role/location not found, return all docs
186
+
187
+ # Filter documents matching the role and location
188
+ filtered = [
189
+ doc for doc in docs
190
+ if role.lower() in doc.metadata.get("position", "").lower()
191
+ and location.lower() in doc.metadata.get("home_town", "").lower()
192
+ ]
193
+ return filtered
194
+
195
+ # Example usage
196
+ query = "Need a software Engineer home town from Gampaha"
197
+
198
+ # Normalize query
199
+ normalized_query = normalize_query(query)
200
+
201
+ # Embed query
202
+ query_vector = embeddings.embed_query(normalized_query)
203
+
204
+ # Pre-filter documents
205
+ filtered_docs = pre_filter_docs(normalized_query, docs)
206
+
207
+ # Re-rank filtered documents
208
+ if filtered_docs:
209
+ # Filter corresponding document vectors
210
+ filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
211
+
212
+ # Compute similarities
213
+ similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
214
+
215
+ # Rank documents
216
+ ranked_docs = sorted(
217
+ zip(similarities, filtered_docs),
218
+ key=lambda x: x[0],
219
+ reverse=True
220
+ )
221
+
222
+ print("Top Matches:")
223
+ for score, doc in ranked_docs:
224
+ print(f"Score: {score:.4f}, Content: {doc.metadata}")
225
+ else:
226
+ print("No relevant documents found.")
227
+
228
+
229
+ @app.get("/search")
230
+ def search(query: str):
231
+ query = normalize_query(query)
232
+ query_vector = embeddings.embed_query(query)
233
+ filtered_docs = pre_filter_docs(query, docs)
234
+
235
+ if filtered_docs:
236
+ filtered_doc_vectors = [doc_vector for doc_vector, doc in zip(doc_vectors, docs) if doc in filtered_docs]
237
+ similarities = cosine_similarity([query_vector], filtered_doc_vectors)[0]
238
+ ranked_docs = sorted(zip(similarities, filtered_docs), key=lambda x: x[0], reverse=True)
239
+ return [{"score": score, "content": doc.metadata} for score, doc in ranked_docs]
240
+ return {"message": "No relevant documents found."}
241
+
242
+
243
+
244
+