karthikvarunn commited on
Commit
2303e92
·
verified ·
1 Parent(s): c724c15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -57
app.py CHANGED
@@ -107,17 +107,10 @@ def complete_workflow(query):
107
  try:
108
  context_data = search_documents(query)
109
 
110
- # print("Context Data")
111
-
112
- # [print(doc) for doc in context_data]
113
-
114
  reranked = rerank(query, context_data)
115
 
116
  context_data= []
117
-
118
- # print("\n\n reranked data")
119
- # print(reranked.data)
120
-
121
  for i, entry in enumerate(reranked.data): # Access the 'data' attribute
122
  context_data.append({
123
  'chunk_id': entry['document']['chunk_id'],
@@ -127,10 +120,6 @@ def complete_workflow(query):
127
  'page_number': str(entry['document']['page_number']),
128
  'score': str(entry['score'])
129
  })
130
-
131
- # print("\n\n New Context Data")
132
- # [print(doc) for doc in context_data]
133
-
134
 
135
  document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
136
  formatted_titles = " " + "\n".join(document_titles)
@@ -158,51 +147,6 @@ def complete_workflow(query):
158
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
159
 
160
 
161
- async def async_complete_workflow(query):
162
- try:
163
- search_task = asyncio.to_thread(search_documents, query)
164
- context_data = await search_task # Run search in parallel
165
-
166
- rerank_task = asyncio.to_thread(rerank, query, context_data)
167
- reranked = await rerank_task # Run rerank in parallel
168
-
169
- # Process results
170
- context_data = [
171
- {
172
- 'chunk_id': entry['document']['chunk_id'],
173
- 'doc_id': entry['document']['doc_id'],
174
- 'title': entry['document']['title'],
175
- 'text': entry['document']['text'],
176
- 'page_number': str(entry['document']['page_number']),
177
- 'score': str(entry['score'])
178
- }
179
- for entry in reranked.data
180
- ]
181
-
182
- # Generate responses
183
- output_tasks = [asyncio.to_thread(generate_output, doc["text"], query) for doc in context_data]
184
- outputs = await asyncio.gather(*output_tasks) # Run LLM calls in parallel
185
-
186
- results = {
187
- "results": [
188
- {
189
- "natural_language_output": outputs[i],
190
- "chunk_id": doc["chunk_id"],
191
- "document_id": doc["doc_id"],
192
- "title": doc["title"],
193
- "text": doc["text"],
194
- "page_number": doc["page_number"],
195
- "score": doc["score"],
196
- }
197
- for i, doc in enumerate(context_data)
198
- ],
199
- "total_results": len(context_data)
200
- }
201
-
202
- return results
203
- except Exception as e:
204
- return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
205
-
206
  def gradio_app():
207
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
208
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")
 
107
  try:
108
  context_data = search_documents(query)
109
 
 
 
 
 
110
  reranked = rerank(query, context_data)
111
 
112
  context_data= []
113
+
 
 
 
114
  for i, entry in enumerate(reranked.data): # Access the 'data' attribute
115
  context_data.append({
116
  'chunk_id': entry['document']['chunk_id'],
 
120
  'page_number': str(entry['document']['page_number']),
121
  'score': str(entry['score'])
122
  })
 
 
 
 
123
 
124
  document_titles = list({os.path.basename(doc["title"]) for doc in context_data}) # Get only file names
125
  formatted_titles = " " + "\n".join(document_titles)
 
147
  return {"results": [], "total_results": 0}, f"Error in workflow: {str(e)}"
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def gradio_app():
151
  with gr.Blocks(css=".result-output {width: 150%; font-size: 16px; padding: 10px;}") as app:
152
  gr.Markdown("### Intelligent Document Search Prototype-v0.1.2 ")