Didier Guillevic commited on
Commit
ee15f4a
·
1 Parent(s): 578247a

Add relevance scores

Browse files
Files changed (1) hide show
  1. app.py +31 -13
app.py CHANGED
@@ -91,17 +91,21 @@ def search_table(
91
  results = (
92
  table.search(query, query_type=query_type)
93
  .where(f"creation_date >= '{filter_year}'", prefilter=True)
94
- .rerank(reranker=reranker)
95
  .limit(top_k * overfetch_factor)
96
- .to_pydantic(Document)
 
 
 
97
  )
98
  elif query_type == "hybrid":
99
  results = (
100
  table.search(query, query_type=query_type)
101
  .where(f"creation_date >= '{filter_year}'", prefilter=True)
 
102
  .rerank(reranker=reranker)
103
  .limit(top_k)
104
- .to_pydantic(Document)
 
105
  )
106
 
107
  return results[:top_k]
@@ -111,13 +115,19 @@ def search_table(
111
  # Generatton: query + context --> response
112
  #
113
 
114
- def create_bulleted_list(texts: list[str]) -> str:
115
  """
116
  This function takes a list of strings and returns HTML with a bulleted list.
117
  """
118
  html_items = []
119
- for item in texts:
120
- html_items.append(f"<li>{item}</li>")
 
 
 
 
 
 
121
  return "<ul>" + "".join(html_items) + "</ul>"
122
 
123
 
@@ -125,7 +135,8 @@ def generate_response(
125
  query: str,
126
  query_type: str,
127
  reranker_name: str,
128
- filter_year: int
 
129
  ) -> list[str, str, str]:
130
  """Generate a response given query, search type and reranker.
131
 
@@ -142,14 +153,16 @@ def generate_response(
142
  query=query,
143
  query_type=query_type,
144
  reranker_name=reranker_name,
145
- filter_year=filter_year
 
146
  )
147
 
148
- references = [result.file_name for result in results]
149
  references_html = "<h4>References</h4>\n" + create_bulleted_list(references)
150
 
151
- snippets = [result.text for result in results]
152
- snippets_html = "<h4>Snippets</h4>\n" + create_bulleted_list(snippets)
 
153
 
154
  # Generate the reponse from the LLM
155
  stream_reponse = llm_utils.generate_chat_response_streaming(
@@ -209,6 +222,10 @@ with gr.Blocks() as demo:
209
  minimum=2005, maximum=2020, value=2005, step=1,
210
  label='Creation date >=', render=False
211
  )
 
 
 
 
212
 
213
  with gr.Row():
214
  # Example questions given default provided PDF file
@@ -224,7 +241,7 @@ with gr.Blocks() as demo:
224
  ['What are the "Ten Global Principles" for fighting tax crime?',],
225
  ["What are some recent developments in the fight against offshore tax evasion?",],
226
  ],
227
- inputs=[question, query_type, reranker_name, filter_year],
228
  outputs=[response, references, snippets],
229
  fn=generate_response,
230
  cache_examples=False,
@@ -237,6 +254,7 @@ with gr.Blocks() as demo:
237
  query_type.render()
238
  reranker_name.render()
239
  filter_year.render()
 
240
 
241
  # Documentation
242
  with gr.Accordion("Documentation", open=False):
@@ -256,7 +274,7 @@ with gr.Blocks() as demo:
256
  # Click actions
257
  response_button.click(
258
  fn=generate_response,
259
- inputs=[question, query_type, reranker_name, filter_year],
260
  outputs=[response, references, snippets]
261
  )
262
  clear_button.click(
 
91
  results = (
92
  table.search(query, query_type=query_type)
93
  .where(f"creation_date >= '{filter_year}'", prefilter=True)
 
94
  .limit(top_k * overfetch_factor)
95
+ .rerank(reranker=reranker)
96
+ .limit(top_k)
97
+ .to_list() # to get access to '_relevance_score'
98
+ #.to_pydantic(Document)
99
  )
100
  elif query_type == "hybrid":
101
  results = (
102
  table.search(query, query_type=query_type)
103
  .where(f"creation_date >= '{filter_year}'", prefilter=True)
104
+ .limit(top_k * overfetch_factor)
105
  .rerank(reranker=reranker)
106
  .limit(top_k)
107
+ .to_list() # to get access to '_relevance_score'
108
+ #.to_pydantic(Document)
109
  )
110
 
111
  return results[:top_k]
 
115
  # Generatton: query + context --> response
116
  #
117
 
118
+ def create_bulleted_list(texts: list[str], scores: list[float]=None) -> str:
119
  """
120
  This function takes a list of strings and returns HTML with a bulleted list.
121
  """
122
  html_items = []
123
+
124
+ if scores is not None:
125
+ for text, score in zip(texts, scores):
126
+ html_items.append(f"<li>(Score={score:.2f})\t{text}</li>")
127
+ else:
128
+ for text in texts:
129
+ html_items.append(f"<li>{text}</li>")
130
+
131
  return "<ul>" + "".join(html_items) + "</ul>"
132
 
133
 
 
135
  query: str,
136
  query_type: str,
137
  reranker_name: str,
138
+ filter_year: int,
139
+ top_k: int
140
  ) -> list[str, str, str]:
141
  """Generate a response given query, search type and reranker.
142
 
 
153
  query=query,
154
  query_type=query_type,
155
  reranker_name=reranker_name,
156
+ filter_year=filter_year,
157
+ top_k=top_k
158
  )
159
 
160
+ references = [result['file_name'] for result in results]
161
  references_html = "<h4>References</h4>\n" + create_bulleted_list(references)
162
 
163
+ snippets = [result['text'] for result in results]
164
+ scores = [result['_relevance_score'] for result in results]
165
+ snippets_html = "<h4>Snippets</h4>\n" + create_bulleted_list(snippets, scores)
166
 
167
  # Generate the reponse from the LLM
168
  stream_reponse = llm_utils.generate_chat_response_streaming(
 
222
  minimum=2005, maximum=2020, value=2005, step=1,
223
  label='Creation date >=', render=False
224
  )
225
+ top_k = gr.Slider(
226
+ minimum=2, maximum=10, value=5, step=1,
227
+ label='Top k result', render=False
228
+ )
229
 
230
  with gr.Row():
231
  # Example questions given default provided PDF file
 
241
  ['What are the "Ten Global Principles" for fighting tax crime?',],
242
  ["What are some recent developments in the fight against offshore tax evasion?",],
243
  ],
244
+ inputs=[question, query_type, reranker_name, filter_year, top_k],
245
  outputs=[response, references, snippets],
246
  fn=generate_response,
247
  cache_examples=False,
 
254
  query_type.render()
255
  reranker_name.render()
256
  filter_year.render()
257
+ top_k.render()
258
 
259
  # Documentation
260
  with gr.Accordion("Documentation", open=False):
 
274
  # Click actions
275
  response_button.click(
276
  fn=generate_response,
277
+ inputs=[question, query_type, reranker_name, filter_year, top_k],
278
  outputs=[response, references, snippets]
279
  )
280
  clear_button.click(