Kevin Hu commited on
Commit
8db6538
·
1 Parent(s): 8b00b96

Support debug components. (#3994)

Browse files

### What problem does this PR solve?

#3993

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

agent/canvas.py CHANGED
@@ -211,6 +211,7 @@ class Canvas(ABC):
211
  except Exception as e:
212
  logging.exception(f"Canvas.run got exception: {e}")
213
  self.path[-1].append(c)
 
214
  raise e
215
  self.path[-1].append(c)
216
  ran += 1
@@ -330,4 +331,4 @@ class Canvas(ABC):
330
  return self.components["begin"]["obj"]._param.query
331
 
332
  def get_component_input_elements(self, cpnnm):
333
- return self.components["begin"]["obj"].get_input_elements()
 
211
  except Exception as e:
212
  logging.exception(f"Canvas.run got exception: {e}")
213
  self.path[-1].append(c)
214
+ ran += 1
215
  raise e
216
  self.path[-1].append(c)
217
  ran += 1
 
331
  return self.components["begin"]["obj"]._param.query
332
 
333
  def get_component_input_elements(self, cpnnm):
334
+ return self.components[cpnnm]["obj"].get_input_elements()
agent/component/base.py CHANGED
@@ -37,6 +37,7 @@ class ComponentParamBase(ABC):
37
  self.message_history_window_size = 22
38
  self.query = []
39
  self.inputs = []
 
40
 
41
  def set_name(self, name: str):
42
  self._name = name
@@ -410,6 +411,7 @@ class ComponentBase(ABC):
410
  def run(self, history, **kwargs):
411
  logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
412
  json.dumps(kwargs, ensure_ascii=False)))
 
413
  try:
414
  res = self._run(history, **kwargs)
415
  self.set_output(res)
@@ -446,10 +448,13 @@ class ComponentBase(ABC):
446
  setattr(self._param, self._param.output_var_name, None)
447
  self._param.inputs = []
448
 
449
- def set_output(self, v: partial | pd.DataFrame):
450
  setattr(self._param, self._param.output_var_name, v)
451
 
452
  def get_input(self):
 
 
 
453
  reversed_cpnts = []
454
  if len(self._canvas.path) > 1:
455
  reversed_cpnts.extend(self._canvas.path[-2])
@@ -531,14 +536,15 @@ class ComponentBase(ABC):
531
  eles = []
532
  for q in self._param.query:
533
  if q.get("component_id"):
534
- if q["component_id"].split("@")[0].lower().find("begin") >= 0:
535
- cpn_id, key = q["component_id"].split("@")
 
536
  eles.extend(self._canvas.get_component(cpn_id)["obj"]._param.query)
537
  continue
538
 
539
- eles.append({"key": q["key"], "component_id": q["component_id"]})
540
  else:
541
- eles.append({"key": q["key"]})
542
  return eles
543
 
544
  def get_stream_input(self):
@@ -558,3 +564,6 @@ class ComponentBase(ABC):
558
 
559
  def get_component_name(self, cpn_id):
560
  return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
 
 
 
 
37
  self.message_history_window_size = 22
38
  self.query = []
39
  self.inputs = []
40
+ self.debug_inputs = []
41
 
42
  def set_name(self, name: str):
43
  self._name = name
 
411
  def run(self, history, **kwargs):
412
  logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
413
  json.dumps(kwargs, ensure_ascii=False)))
414
+ self._param.debug_inputs = []
415
  try:
416
  res = self._run(history, **kwargs)
417
  self.set_output(res)
 
448
  setattr(self._param, self._param.output_var_name, None)
449
  self._param.inputs = []
450
 
451
+ def set_output(self, v):
452
  setattr(self._param, self._param.output_var_name, v)
453
 
454
  def get_input(self):
455
+ if self._param.debug_inputs:
456
+ return pd.DataFrame([{"content": v["value"]} for v in self._param.debug_inputs])
457
+
458
  reversed_cpnts = []
459
  if len(self._canvas.path) > 1:
460
  reversed_cpnts.extend(self._canvas.path[-2])
 
536
  eles = []
537
  for q in self._param.query:
538
  if q.get("component_id"):
539
+ cpn_id = q["component_id"]
540
+ if cpn_id.split("@")[0].lower().find("begin") >= 0:
541
+ cpn_id, key = cpn_id.split("@")
542
  eles.extend(self._canvas.get_component(cpn_id)["obj"]._param.query)
543
  continue
544
 
545
+ eles.append({"name": self._canvas.get_compnent_name(cpn_id), "key": cpn_id})
546
  else:
547
+ eles.append({"key": q["value"], "name": q["value"], "value": q["value"]})
548
  return eles
549
 
550
  def get_stream_input(self):
 
564
 
565
  def get_component_name(self, cpn_id):
566
  return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
567
+
568
+ def debug(self, **kwargs):
569
+ return self._run([], **kwargs)
agent/component/begin.py CHANGED
@@ -43,7 +43,7 @@ class Begin(ComponentBase):
43
  def stream_output(self):
44
  res = {"content": self._param.prologue}
45
  yield res
46
- self.set_output(res)
47
 
48
 
49
 
 
43
  def stream_output(self):
44
  res = {"content": self._param.prologue}
45
  yield res
46
+ self.set_output(self.be_output(res))
47
 
48
 
49
 
agent/component/generate.py CHANGED
@@ -111,9 +111,9 @@ class Generate(ComponentBase):
111
 
112
  def get_input_elements(self):
113
  if self._param.parameters:
114
- return self._param.parameters
115
 
116
- return [{"key": "input"}]
117
 
118
  def _run(self, history, **kwargs):
119
  chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
@@ -218,4 +218,16 @@ class Generate(ComponentBase):
218
  res = self.set_cite(retrieval_res, answer)
219
  yield res
220
 
221
- self.set_output(res)
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  def get_input_elements(self):
113
  if self._param.parameters:
114
+ return [{"key": "user"}, *self._param.parameters]
115
 
116
+ return [{"key": "user"}]
117
 
118
  def _run(self, history, **kwargs):
119
  chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
 
218
  res = self.set_cite(retrieval_res, answer)
219
  yield res
220
 
221
+ self.set_output(Generate.be_output(res))
222
+
223
+ def debug(self, history, **kwargs):
224
+ chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
225
+ prompt = self._param.prompt
226
+
227
+ for para in self._param.debug_inputs:
228
+ kwargs[para["key"]] = para["value"]
229
+
230
+ for n, v in kwargs.items():
231
+ prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
232
+
233
+ return chat_mdl.chat(prompt, [{"role": "user", "content": kwargs.get("user", "")}], self._param.gen_conf())
api/apps/canvas_app.py CHANGED
@@ -187,10 +187,32 @@ def reset():
187
 
188
 
189
  @manager.route('/input_elements', methods=['GET']) # noqa: F821
190
- @validate_request("id", "component_id")
191
  @login_required
192
  def input_elements():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  req = request.json
 
 
194
  try:
195
  e, user_canvas = UserCanvasService.get_by_id(req["id"])
196
  if not e:
@@ -201,7 +223,9 @@ def input_elements():
201
  code=RetCode.OPERATING_ERROR)
202
 
203
  canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
204
- return get_json_result(data=canvas.get_component_input_elements(req["component_id"]))
 
 
205
  except Exception as e:
206
  return server_error_response(e)
207
 
 
187
 
188
 
189
  @manager.route('/input_elements', methods=['GET']) # noqa: F821
 
190
  @login_required
191
  def input_elements():
192
+ cvs_id = request.args.get("id")
193
+ cpn_id = request.args.get("component_id")
194
+ try:
195
+ e, user_canvas = UserCanvasService.get_by_id(cvs_id)
196
+ if not e:
197
+ return get_data_error_result(message="canvas not found.")
198
+ if not UserCanvasService.query(user_id=current_user.id, id=cvs_id):
199
+ return get_json_result(
200
+ data=False, message='Only owner of canvas authorized for this operation.',
201
+ code=RetCode.OPERATING_ERROR)
202
+
203
+ canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
204
+ return get_json_result(data=canvas.get_component_input_elements(cpn_id))
205
+ except Exception as e:
206
+ return server_error_response(e)
207
+
208
+
209
+ @manager.route('/debug', methods=['POST']) # noqa: F821
210
+ @validate_request("id", "component_id", "params")
211
+ @login_required
212
+ def debug():
213
  req = request.json
214
+ for p in req["params"]:
215
+ assert p.get("key")
216
  try:
217
  e, user_canvas = UserCanvasService.get_by_id(req["id"])
218
  if not e:
 
223
  code=RetCode.OPERATING_ERROR)
224
 
225
  canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
226
+ canvas.get_component(req["component_id"])["obj"]._param.debug_inputs = req["params"]
227
+ df = canvas.get_component(req["component_id"])["obj"].debug()
228
+ return get_json_result(data=df.to_dict(orient="records"))
229
  except Exception as e:
230
  return server_error_response(e)
231
 
api/apps/conversation_app.py CHANGED
@@ -95,6 +95,8 @@ def get():
95
  return d.get(k1, d.get(k2))
96
 
97
  for ref in conv.reference:
 
 
98
  ref["chunks"] = [{
99
  "id": get_value(ck, "chunk_id", "id"),
100
  "content": get_value(ck, "content", "content_with_weight"),
 
95
  return d.get(k1, d.get(k2))
96
 
97
  for ref in conv.reference:
98
+ if isinstance(ref, list):
99
+ continue
100
  ref["chunks"] = [{
101
  "id": get_value(ck, "chunk_id", "id"),
102
  "content": get_value(ck, "content", "content_with_weight"),
api/apps/document_app.py CHANGED
@@ -552,7 +552,7 @@ def parse():
552
  })
553
  driver = Chrome(options=options)
554
  driver.get(url)
555
- res_headers = [r.response.headers for r in driver.requests]
556
  if len(res_headers) > 1:
557
  sections = RAGFlowHtmlParser().parser_txt(driver.page_source)
558
  driver.quit()
 
552
  })
553
  driver = Chrome(options=options)
554
  driver.get(url)
555
+ res_headers = [r.response.headers for r in driver.requests if r and r.response]
556
  if len(res_headers) > 1:
557
  sections = RAGFlowHtmlParser().parser_txt(driver.page_source)
558
  driver.quit()
rag/svr/task_executor.py CHANGED
@@ -54,7 +54,7 @@ from rag.app import laws, paper, presentation, manual, qa, table, book, resume,
54
  from rag.nlp import search, rag_tokenizer
55
  from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
56
  from rag.settings import DOC_MAXIMUM_SIZE, SVR_QUEUE_NAME, print_rag_settings
57
- from rag.utils import rmSpace, num_tokens_from_string
58
  from rag.utils.redis_conn import REDIS_CONN, Payload
59
  from rag.utils.storage_factory import STORAGE_IMPL
60
 
@@ -269,7 +269,7 @@ def embedding(docs, mdl, parser_config=None, callback=None):
269
  batch_size = 16
270
  tts, cnts = [], []
271
  for d in docs:
272
- tts.append(rmSpace(d.get("docnm_kwd", "Title")))
273
  c = "\n".join(d.get("question_kwd", []))
274
  if not c:
275
  c = d["content_with_weight"]
 
54
  from rag.nlp import search, rag_tokenizer
55
  from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
56
  from rag.settings import DOC_MAXIMUM_SIZE, SVR_QUEUE_NAME, print_rag_settings
57
+ from rag.utils import num_tokens_from_string
58
  from rag.utils.redis_conn import REDIS_CONN, Payload
59
  from rag.utils.storage_factory import STORAGE_IMPL
60
 
 
269
  batch_size = 16
270
  tts, cnts = [], []
271
  for d in docs:
272
+ tts.append(d.get("docnm_kwd", "Title"))
273
  c = "\n".join(d.get("question_kwd", []))
274
  if not c:
275
  c = d["content_with_weight"]