Kevin Hu
commited on
Commit
·
8db6538
1
Parent(s):
8b00b96
Support debug components. (#3994)
Browse files### What problem does this PR solve?
#3993
### Type of change
- [x] Bug Fix (non-breaking change which fixes an issue)
- agent/canvas.py +2 -1
- agent/component/base.py +14 -5
- agent/component/begin.py +1 -1
- agent/component/generate.py +15 -3
- api/apps/canvas_app.py +26 -2
- api/apps/conversation_app.py +2 -0
- api/apps/document_app.py +1 -1
- rag/svr/task_executor.py +2 -2
agent/canvas.py
CHANGED
@@ -211,6 +211,7 @@ class Canvas(ABC):
|
|
211 |
except Exception as e:
|
212 |
logging.exception(f"Canvas.run got exception: {e}")
|
213 |
self.path[-1].append(c)
|
|
|
214 |
raise e
|
215 |
self.path[-1].append(c)
|
216 |
ran += 1
|
@@ -330,4 +331,4 @@ class Canvas(ABC):
|
|
330 |
return self.components["begin"]["obj"]._param.query
|
331 |
|
332 |
def get_component_input_elements(self, cpnnm):
|
333 |
-
return self.components[
|
|
|
211 |
except Exception as e:
|
212 |
logging.exception(f"Canvas.run got exception: {e}")
|
213 |
self.path[-1].append(c)
|
214 |
+
ran += 1
|
215 |
raise e
|
216 |
self.path[-1].append(c)
|
217 |
ran += 1
|
|
|
331 |
return self.components["begin"]["obj"]._param.query
|
332 |
|
333 |
def get_component_input_elements(self, cpnnm):
|
334 |
+
return self.components[cpnnm]["obj"].get_input_elements()
|
agent/component/base.py
CHANGED
@@ -37,6 +37,7 @@ class ComponentParamBase(ABC):
|
|
37 |
self.message_history_window_size = 22
|
38 |
self.query = []
|
39 |
self.inputs = []
|
|
|
40 |
|
41 |
def set_name(self, name: str):
|
42 |
self._name = name
|
@@ -410,6 +411,7 @@ class ComponentBase(ABC):
|
|
410 |
def run(self, history, **kwargs):
|
411 |
logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
|
412 |
json.dumps(kwargs, ensure_ascii=False)))
|
|
|
413 |
try:
|
414 |
res = self._run(history, **kwargs)
|
415 |
self.set_output(res)
|
@@ -446,10 +448,13 @@ class ComponentBase(ABC):
|
|
446 |
setattr(self._param, self._param.output_var_name, None)
|
447 |
self._param.inputs = []
|
448 |
|
449 |
-
def set_output(self, v
|
450 |
setattr(self._param, self._param.output_var_name, v)
|
451 |
|
452 |
def get_input(self):
|
|
|
|
|
|
|
453 |
reversed_cpnts = []
|
454 |
if len(self._canvas.path) > 1:
|
455 |
reversed_cpnts.extend(self._canvas.path[-2])
|
@@ -531,14 +536,15 @@ class ComponentBase(ABC):
|
|
531 |
eles = []
|
532 |
for q in self._param.query:
|
533 |
if q.get("component_id"):
|
534 |
-
|
535 |
-
|
|
|
536 |
eles.extend(self._canvas.get_component(cpn_id)["obj"]._param.query)
|
537 |
continue
|
538 |
|
539 |
-
eles.append({"
|
540 |
else:
|
541 |
-
eles.append({"key": q["
|
542 |
return eles
|
543 |
|
544 |
def get_stream_input(self):
|
@@ -558,3 +564,6 @@ class ComponentBase(ABC):
|
|
558 |
|
559 |
def get_component_name(self, cpn_id):
|
560 |
return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
|
|
|
|
|
|
|
|
37 |
self.message_history_window_size = 22
|
38 |
self.query = []
|
39 |
self.inputs = []
|
40 |
+
self.debug_inputs = []
|
41 |
|
42 |
def set_name(self, name: str):
|
43 |
self._name = name
|
|
|
411 |
def run(self, history, **kwargs):
|
412 |
logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
|
413 |
json.dumps(kwargs, ensure_ascii=False)))
|
414 |
+
self._param.debug_inputs = []
|
415 |
try:
|
416 |
res = self._run(history, **kwargs)
|
417 |
self.set_output(res)
|
|
|
448 |
setattr(self._param, self._param.output_var_name, None)
|
449 |
self._param.inputs = []
|
450 |
|
451 |
+
def set_output(self, v):
|
452 |
setattr(self._param, self._param.output_var_name, v)
|
453 |
|
454 |
def get_input(self):
|
455 |
+
if self._param.debug_inputs:
|
456 |
+
return pd.DataFrame([{"content": v["value"]} for v in self._param.debug_inputs])
|
457 |
+
|
458 |
reversed_cpnts = []
|
459 |
if len(self._canvas.path) > 1:
|
460 |
reversed_cpnts.extend(self._canvas.path[-2])
|
|
|
536 |
eles = []
|
537 |
for q in self._param.query:
|
538 |
if q.get("component_id"):
|
539 |
+
cpn_id = q["component_id"]
|
540 |
+
if cpn_id.split("@")[0].lower().find("begin") >= 0:
|
541 |
+
cpn_id, key = cpn_id.split("@")
|
542 |
eles.extend(self._canvas.get_component(cpn_id)["obj"]._param.query)
|
543 |
continue
|
544 |
|
545 |
+
eles.append({"name": self._canvas.get_compnent_name(cpn_id), "key": cpn_id})
|
546 |
else:
|
547 |
+
eles.append({"key": q["value"], "name": q["value"], "value": q["value"]})
|
548 |
return eles
|
549 |
|
550 |
def get_stream_input(self):
|
|
|
564 |
|
565 |
def get_component_name(self, cpn_id):
|
566 |
return self._canvas.get_component(cpn_id)["obj"].component_name.lower()
|
567 |
+
|
568 |
+
def debug(self, **kwargs):
|
569 |
+
return self._run([], **kwargs)
|
agent/component/begin.py
CHANGED
@@ -43,7 +43,7 @@ class Begin(ComponentBase):
|
|
43 |
def stream_output(self):
|
44 |
res = {"content": self._param.prologue}
|
45 |
yield res
|
46 |
-
self.set_output(res)
|
47 |
|
48 |
|
49 |
|
|
|
43 |
def stream_output(self):
|
44 |
res = {"content": self._param.prologue}
|
45 |
yield res
|
46 |
+
self.set_output(self.be_output(res))
|
47 |
|
48 |
|
49 |
|
agent/component/generate.py
CHANGED
@@ -111,9 +111,9 @@ class Generate(ComponentBase):
|
|
111 |
|
112 |
def get_input_elements(self):
|
113 |
if self._param.parameters:
|
114 |
-
return self._param.parameters
|
115 |
|
116 |
-
return [{"key": "
|
117 |
|
118 |
def _run(self, history, **kwargs):
|
119 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
@@ -218,4 +218,16 @@ class Generate(ComponentBase):
|
|
218 |
res = self.set_cite(retrieval_res, answer)
|
219 |
yield res
|
220 |
|
221 |
-
self.set_output(res)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
112 |
def get_input_elements(self):
|
113 |
if self._param.parameters:
|
114 |
+
return [{"key": "user"}, *self._param.parameters]
|
115 |
|
116 |
+
return [{"key": "user"}]
|
117 |
|
118 |
def _run(self, history, **kwargs):
|
119 |
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
|
|
218 |
res = self.set_cite(retrieval_res, answer)
|
219 |
yield res
|
220 |
|
221 |
+
self.set_output(Generate.be_output(res))
|
222 |
+
|
223 |
+
def debug(self, history, **kwargs):
|
224 |
+
chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
|
225 |
+
prompt = self._param.prompt
|
226 |
+
|
227 |
+
for para in self._param.debug_inputs:
|
228 |
+
kwargs[para["key"]] = para["value"]
|
229 |
+
|
230 |
+
for n, v in kwargs.items():
|
231 |
+
prompt = re.sub(r"\{%s\}" % re.escape(n), str(v).replace("\\", " "), prompt)
|
232 |
+
|
233 |
+
return chat_mdl.chat(prompt, [{"role": "user", "content": kwargs.get("user", "")}], self._param.gen_conf())
|
api/apps/canvas_app.py
CHANGED
@@ -187,10 +187,32 @@ def reset():
|
|
187 |
|
188 |
|
189 |
@manager.route('/input_elements', methods=['GET']) # noqa: F821
|
190 |
-
@validate_request("id", "component_id")
|
191 |
@login_required
|
192 |
def input_elements():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
193 |
req = request.json
|
|
|
|
|
194 |
try:
|
195 |
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
196 |
if not e:
|
@@ -201,7 +223,9 @@ def input_elements():
|
|
201 |
code=RetCode.OPERATING_ERROR)
|
202 |
|
203 |
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
204 |
-
|
|
|
|
|
205 |
except Exception as e:
|
206 |
return server_error_response(e)
|
207 |
|
|
|
187 |
|
188 |
|
189 |
@manager.route('/input_elements', methods=['GET']) # noqa: F821
|
|
|
190 |
@login_required
|
191 |
def input_elements():
|
192 |
+
cvs_id = request.args.get("id")
|
193 |
+
cpn_id = request.args.get("component_id")
|
194 |
+
try:
|
195 |
+
e, user_canvas = UserCanvasService.get_by_id(cvs_id)
|
196 |
+
if not e:
|
197 |
+
return get_data_error_result(message="canvas not found.")
|
198 |
+
if not UserCanvasService.query(user_id=current_user.id, id=cvs_id):
|
199 |
+
return get_json_result(
|
200 |
+
data=False, message='Only owner of canvas authorized for this operation.',
|
201 |
+
code=RetCode.OPERATING_ERROR)
|
202 |
+
|
203 |
+
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
204 |
+
return get_json_result(data=canvas.get_component_input_elements(cpn_id))
|
205 |
+
except Exception as e:
|
206 |
+
return server_error_response(e)
|
207 |
+
|
208 |
+
|
209 |
+
@manager.route('/debug', methods=['POST']) # noqa: F821
|
210 |
+
@validate_request("id", "component_id", "params")
|
211 |
+
@login_required
|
212 |
+
def debug():
|
213 |
req = request.json
|
214 |
+
for p in req["params"]:
|
215 |
+
assert p.get("key")
|
216 |
try:
|
217 |
e, user_canvas = UserCanvasService.get_by_id(req["id"])
|
218 |
if not e:
|
|
|
223 |
code=RetCode.OPERATING_ERROR)
|
224 |
|
225 |
canvas = Canvas(json.dumps(user_canvas.dsl), current_user.id)
|
226 |
+
canvas.get_component(req["component_id"])["obj"]._param.debug_inputs = req["params"]
|
227 |
+
df = canvas.get_component(req["component_id"])["obj"].debug()
|
228 |
+
return get_json_result(data=df.to_dict(orient="records"))
|
229 |
except Exception as e:
|
230 |
return server_error_response(e)
|
231 |
|
api/apps/conversation_app.py
CHANGED
@@ -95,6 +95,8 @@ def get():
|
|
95 |
return d.get(k1, d.get(k2))
|
96 |
|
97 |
for ref in conv.reference:
|
|
|
|
|
98 |
ref["chunks"] = [{
|
99 |
"id": get_value(ck, "chunk_id", "id"),
|
100 |
"content": get_value(ck, "content", "content_with_weight"),
|
|
|
95 |
return d.get(k1, d.get(k2))
|
96 |
|
97 |
for ref in conv.reference:
|
98 |
+
if isinstance(ref, list):
|
99 |
+
continue
|
100 |
ref["chunks"] = [{
|
101 |
"id": get_value(ck, "chunk_id", "id"),
|
102 |
"content": get_value(ck, "content", "content_with_weight"),
|
api/apps/document_app.py
CHANGED
@@ -552,7 +552,7 @@ def parse():
|
|
552 |
})
|
553 |
driver = Chrome(options=options)
|
554 |
driver.get(url)
|
555 |
-
res_headers = [r.response.headers for r in driver.requests]
|
556 |
if len(res_headers) > 1:
|
557 |
sections = RAGFlowHtmlParser().parser_txt(driver.page_source)
|
558 |
driver.quit()
|
|
|
552 |
})
|
553 |
driver = Chrome(options=options)
|
554 |
driver.get(url)
|
555 |
+
res_headers = [r.response.headers for r in driver.requests if r and r.response]
|
556 |
if len(res_headers) > 1:
|
557 |
sections = RAGFlowHtmlParser().parser_txt(driver.page_source)
|
558 |
driver.quit()
|
rag/svr/task_executor.py
CHANGED
@@ -54,7 +54,7 @@ from rag.app import laws, paper, presentation, manual, qa, table, book, resume,
|
|
54 |
from rag.nlp import search, rag_tokenizer
|
55 |
from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
|
56 |
from rag.settings import DOC_MAXIMUM_SIZE, SVR_QUEUE_NAME, print_rag_settings
|
57 |
-
from rag.utils import
|
58 |
from rag.utils.redis_conn import REDIS_CONN, Payload
|
59 |
from rag.utils.storage_factory import STORAGE_IMPL
|
60 |
|
@@ -269,7 +269,7 @@ def embedding(docs, mdl, parser_config=None, callback=None):
|
|
269 |
batch_size = 16
|
270 |
tts, cnts = [], []
|
271 |
for d in docs:
|
272 |
-
tts.append(
|
273 |
c = "\n".join(d.get("question_kwd", []))
|
274 |
if not c:
|
275 |
c = d["content_with_weight"]
|
|
|
54 |
from rag.nlp import search, rag_tokenizer
|
55 |
from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor
|
56 |
from rag.settings import DOC_MAXIMUM_SIZE, SVR_QUEUE_NAME, print_rag_settings
|
57 |
+
from rag.utils import num_tokens_from_string
|
58 |
from rag.utils.redis_conn import REDIS_CONN, Payload
|
59 |
from rag.utils.storage_factory import STORAGE_IMPL
|
60 |
|
|
|
269 |
batch_size = 16
|
270 |
tts, cnts = [], []
|
271 |
for d in docs:
|
272 |
+
tts.append(d.get("docnm_kwd", "Title"))
|
273 |
c = "\n".join(d.get("question_kwd", []))
|
274 |
if not c:
|
275 |
c = d["content_with_weight"]
|