Spaces:
Running
Running
Didier Guillevic
commited on
Commit
·
ee15f4a
1
Parent(s):
578247a
Add relevance scores
Browse files
app.py
CHANGED
@@ -91,17 +91,21 @@ def search_table(
|
|
91 |
results = (
|
92 |
table.search(query, query_type=query_type)
|
93 |
.where(f"creation_date >= '{filter_year}'", prefilter=True)
|
94 |
-
.rerank(reranker=reranker)
|
95 |
.limit(top_k * overfetch_factor)
|
96 |
-
.
|
|
|
|
|
|
|
97 |
)
|
98 |
elif query_type == "hybrid":
|
99 |
results = (
|
100 |
table.search(query, query_type=query_type)
|
101 |
.where(f"creation_date >= '{filter_year}'", prefilter=True)
|
|
|
102 |
.rerank(reranker=reranker)
|
103 |
.limit(top_k)
|
104 |
-
.
|
|
|
105 |
)
|
106 |
|
107 |
return results[:top_k]
|
@@ -111,13 +115,19 @@ def search_table(
|
|
111 |
# Generatton: query + context --> response
|
112 |
#
|
113 |
|
114 |
-
def create_bulleted_list(texts: list[str]) -> str:
|
115 |
"""
|
116 |
This function takes a list of strings and returns HTML with a bulleted list.
|
117 |
"""
|
118 |
html_items = []
|
119 |
-
|
120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
return "<ul>" + "".join(html_items) + "</ul>"
|
122 |
|
123 |
|
@@ -125,7 +135,8 @@ def generate_response(
|
|
125 |
query: str,
|
126 |
query_type: str,
|
127 |
reranker_name: str,
|
128 |
-
filter_year: int
|
|
|
129 |
) -> list[str, str, str]:
|
130 |
"""Generate a response given query, search type and reranker.
|
131 |
|
@@ -142,14 +153,16 @@ def generate_response(
|
|
142 |
query=query,
|
143 |
query_type=query_type,
|
144 |
reranker_name=reranker_name,
|
145 |
-
filter_year=filter_year
|
|
|
146 |
)
|
147 |
|
148 |
-
references = [result
|
149 |
references_html = "<h4>References</h4>\n" + create_bulleted_list(references)
|
150 |
|
151 |
-
snippets = [result
|
152 |
-
|
|
|
153 |
|
154 |
# Generate the reponse from the LLM
|
155 |
stream_reponse = llm_utils.generate_chat_response_streaming(
|
@@ -209,6 +222,10 @@ with gr.Blocks() as demo:
|
|
209 |
minimum=2005, maximum=2020, value=2005, step=1,
|
210 |
label='Creation date >=', render=False
|
211 |
)
|
|
|
|
|
|
|
|
|
212 |
|
213 |
with gr.Row():
|
214 |
# Example questions given default provided PDF file
|
@@ -224,7 +241,7 @@ with gr.Blocks() as demo:
|
|
224 |
['What are the "Ten Global Principles" for fighting tax crime?',],
|
225 |
["What are some recent developments in the fight against offshore tax evasion?",],
|
226 |
],
|
227 |
-
inputs=[question, query_type, reranker_name, filter_year],
|
228 |
outputs=[response, references, snippets],
|
229 |
fn=generate_response,
|
230 |
cache_examples=False,
|
@@ -237,6 +254,7 @@ with gr.Blocks() as demo:
|
|
237 |
query_type.render()
|
238 |
reranker_name.render()
|
239 |
filter_year.render()
|
|
|
240 |
|
241 |
# Documentation
|
242 |
with gr.Accordion("Documentation", open=False):
|
@@ -256,7 +274,7 @@ with gr.Blocks() as demo:
|
|
256 |
# Click actions
|
257 |
response_button.click(
|
258 |
fn=generate_response,
|
259 |
-
inputs=[question, query_type, reranker_name, filter_year],
|
260 |
outputs=[response, references, snippets]
|
261 |
)
|
262 |
clear_button.click(
|
|
|
91 |
results = (
|
92 |
table.search(query, query_type=query_type)
|
93 |
.where(f"creation_date >= '{filter_year}'", prefilter=True)
|
|
|
94 |
.limit(top_k * overfetch_factor)
|
95 |
+
.rerank(reranker=reranker)
|
96 |
+
.limit(top_k)
|
97 |
+
.to_list() # to get access to '_relevance_score'
|
98 |
+
#.to_pydantic(Document)
|
99 |
)
|
100 |
elif query_type == "hybrid":
|
101 |
results = (
|
102 |
table.search(query, query_type=query_type)
|
103 |
.where(f"creation_date >= '{filter_year}'", prefilter=True)
|
104 |
+
.limit(top_k * overfetch_factor)
|
105 |
.rerank(reranker=reranker)
|
106 |
.limit(top_k)
|
107 |
+
.to_list() # to get access to '_relevance_score'
|
108 |
+
#.to_pydantic(Document)
|
109 |
)
|
110 |
|
111 |
return results[:top_k]
|
|
|
115 |
# Generatton: query + context --> response
|
116 |
#
|
117 |
|
118 |
+
def create_bulleted_list(texts: list[str], scores: list[float]=None) -> str:
|
119 |
"""
|
120 |
This function takes a list of strings and returns HTML with a bulleted list.
|
121 |
"""
|
122 |
html_items = []
|
123 |
+
|
124 |
+
if scores is not None:
|
125 |
+
for text, score in zip(texts, scores):
|
126 |
+
html_items.append(f"<li>(Score={score:.2f})\t{text}</li>")
|
127 |
+
else:
|
128 |
+
for text in texts:
|
129 |
+
html_items.append(f"<li>{text}</li>")
|
130 |
+
|
131 |
return "<ul>" + "".join(html_items) + "</ul>"
|
132 |
|
133 |
|
|
|
135 |
query: str,
|
136 |
query_type: str,
|
137 |
reranker_name: str,
|
138 |
+
filter_year: int,
|
139 |
+
top_k: int
|
140 |
) -> list[str, str, str]:
|
141 |
"""Generate a response given query, search type and reranker.
|
142 |
|
|
|
153 |
query=query,
|
154 |
query_type=query_type,
|
155 |
reranker_name=reranker_name,
|
156 |
+
filter_year=filter_year,
|
157 |
+
top_k=top_k
|
158 |
)
|
159 |
|
160 |
+
references = [result['file_name'] for result in results]
|
161 |
references_html = "<h4>References</h4>\n" + create_bulleted_list(references)
|
162 |
|
163 |
+
snippets = [result['text'] for result in results]
|
164 |
+
scores = [result['_relevance_score'] for result in results]
|
165 |
+
snippets_html = "<h4>Snippets</h4>\n" + create_bulleted_list(snippets, scores)
|
166 |
|
167 |
# Generate the reponse from the LLM
|
168 |
stream_reponse = llm_utils.generate_chat_response_streaming(
|
|
|
222 |
minimum=2005, maximum=2020, value=2005, step=1,
|
223 |
label='Creation date >=', render=False
|
224 |
)
|
225 |
+
top_k = gr.Slider(
|
226 |
+
minimum=2, maximum=10, value=5, step=1,
|
227 |
+
label='Top k result', render=False
|
228 |
+
)
|
229 |
|
230 |
with gr.Row():
|
231 |
# Example questions given default provided PDF file
|
|
|
241 |
['What are the "Ten Global Principles" for fighting tax crime?',],
|
242 |
["What are some recent developments in the fight against offshore tax evasion?",],
|
243 |
],
|
244 |
+
inputs=[question, query_type, reranker_name, filter_year, top_k],
|
245 |
outputs=[response, references, snippets],
|
246 |
fn=generate_response,
|
247 |
cache_examples=False,
|
|
|
254 |
query_type.render()
|
255 |
reranker_name.render()
|
256 |
filter_year.render()
|
257 |
+
top_k.render()
|
258 |
|
259 |
# Documentation
|
260 |
with gr.Accordion("Documentation", open=False):
|
|
|
274 |
# Click actions
|
275 |
response_button.click(
|
276 |
fn=generate_response,
|
277 |
+
inputs=[question, query_type, reranker_name, filter_year, top_k],
|
278 |
outputs=[response, references, snippets]
|
279 |
)
|
280 |
clear_button.click(
|