zhichyu commited on
Commit
8bc2fc9
·
1 Parent(s): c337e13

Use consistent log file names, introduced initLogger (#3403)

Browse files

### What problem does this PR solve?

Use consistent log file names, introduced initLogger

### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [ ] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [x] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. agent/canvas.py +5 -5
  2. agent/component/arxiv.py +2 -2
  3. agent/component/baidu.py +2 -2
  4. agent/component/base.py +5 -5
  5. agent/component/bing.py +2 -2
  6. agent/component/categorize.py +2 -2
  7. agent/component/duckduckgo.py +2 -2
  8. agent/component/github.py +2 -2
  9. agent/component/google.py +2 -2
  10. agent/component/googlescholar.py +3 -3
  11. agent/component/keyword.py +2 -2
  12. agent/component/pubmed.py +2 -2
  13. agent/component/relevant.py +2 -2
  14. agent/component/retrieval.py +2 -2
  15. agent/component/rewrite.py +2 -2
  16. agent/component/wikipedia.py +2 -2
  17. agent/component/yahoofinance.py +2 -2
  18. api/apps/__init__.py +2 -2
  19. api/apps/canvas_app.py +2 -2
  20. api/apps/llm_app.py +2 -2
  21. api/apps/user_app.py +7 -7
  22. api/db/db_models.py +7 -7
  23. api/db/init_data.py +8 -8
  24. api/db/services/dialog_service.py +9 -9
  25. api/db/services/document_service.py +3 -3
  26. api/db/services/file_service.py +3 -3
  27. api/db/services/llm_service.py +9 -9
  28. api/ragflow_server.py +17 -16
  29. api/utils/api_utils.py +3 -3
  30. api/utils/log_utils.py +30 -27
  31. api/validation.py +3 -3
  32. deepdoc/parser/pdf_parser.py +17 -21
  33. deepdoc/parser/resume/entities/corporations.py +2 -2
  34. deepdoc/parser/resume/step_two.py +7 -8
  35. deepdoc/vision/operators.py +2 -2
  36. deepdoc/vision/recognizer.py +2 -2
  37. deepdoc/vision/seeit.py +2 -2
  38. deepdoc/vision/t_recognizer.py +2 -2
  39. deepdoc/vision/table_structure_recognizer.py +1 -1
  40. graphrag/claim_extractor.py +3 -3
  41. graphrag/community_reports_extractor.py +3 -3
  42. graphrag/description_summary.py +0 -8
  43. graphrag/entity_resolution.py +1 -1
  44. graphrag/index.py +3 -3
  45. graphrag/leiden.py +1 -3
  46. graphrag/mind_map_extractor.py +3 -4
  47. intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py +11 -11
  48. rag/app/book.py +2 -2
  49. rag/app/email.py +2 -2
  50. rag/app/laws.py +1 -2
agent/canvas.py CHANGED
@@ -13,13 +13,13 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import json
17
  from abc import ABC
18
  from copy import deepcopy
19
  from functools import partial
20
  from agent.component import component_class
21
  from agent.component.base import ComponentBase
22
- from api.utils.log_utils import logger
23
 
24
  class Canvas(ABC):
25
  """
@@ -187,7 +187,7 @@ class Canvas(ABC):
187
  if cpn.component_name == "Answer":
188
  self.answer.append(c)
189
  else:
190
- logger.debug(f"Canvas.prepare2run: {c}")
191
  cpids = cpn.get_dependent_components()
192
  if any([c not in self.path[-1] for c in cpids]):
193
  continue
@@ -197,7 +197,7 @@ class Canvas(ABC):
197
 
198
  prepare2run(self.components[self.path[-2][-1]]["downstream"])
199
  while 0 <= ran < len(self.path[-1]):
200
- logger.debug(f"Canvas.run: {ran} {self.path}")
201
  cpn_id = self.path[-1][ran]
202
  cpn = self.get_component(cpn_id)
203
  if not cpn["downstream"]: break
@@ -217,7 +217,7 @@ class Canvas(ABC):
217
  self.get_component(p)["obj"].set_exception(e)
218
  prepare2run([p])
219
  break
220
- logger.exception("Canvas.run got exception")
221
  break
222
  continue
223
 
@@ -229,7 +229,7 @@ class Canvas(ABC):
229
  self.get_component(p)["obj"].set_exception(e)
230
  prepare2run([p])
231
  break
232
- logger.exception("Canvas.run got exception")
233
  break
234
 
235
  if self.answer:
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import json
18
  from abc import ABC
19
  from copy import deepcopy
20
  from functools import partial
21
  from agent.component import component_class
22
  from agent.component.base import ComponentBase
 
23
 
24
  class Canvas(ABC):
25
  """
 
187
  if cpn.component_name == "Answer":
188
  self.answer.append(c)
189
  else:
190
+ logging.debug(f"Canvas.prepare2run: {c}")
191
  cpids = cpn.get_dependent_components()
192
  if any([c not in self.path[-1] for c in cpids]):
193
  continue
 
197
 
198
  prepare2run(self.components[self.path[-2][-1]]["downstream"])
199
  while 0 <= ran < len(self.path[-1]):
200
+ logging.debug(f"Canvas.run: {ran} {self.path}")
201
  cpn_id = self.path[-1][ran]
202
  cpn = self.get_component(cpn_id)
203
  if not cpn["downstream"]: break
 
217
  self.get_component(p)["obj"].set_exception(e)
218
  prepare2run([p])
219
  break
220
+ logging.exception("Canvas.run got exception")
221
  break
222
  continue
223
 
 
229
  self.get_component(p)["obj"].set_exception(e)
230
  prepare2run([p])
231
  break
232
+ logging.exception("Canvas.run got exception")
233
  break
234
 
235
  if self.answer:
agent/component/arxiv.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import arxiv
18
  import pandas as pd
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
- from api.utils.log_utils import logger
21
 
22
  class ArXivParam(ComponentParamBase):
23
  """
@@ -64,5 +64,5 @@ class ArXiv(ComponentBase, ABC):
64
  return ArXiv.be_output("")
65
 
66
  df = pd.DataFrame(arxiv_res)
67
- logger.debug(f"df: {str(df)}")
68
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import arxiv
19
  import pandas as pd
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
  class ArXivParam(ComponentParamBase):
23
  """
 
64
  return ArXiv.be_output("")
65
 
66
  df = pd.DataFrame(arxiv_res)
67
+ logging.debug(f"df: {str(df)}")
68
  return df
agent/component/baidu.py CHANGED
@@ -13,12 +13,12 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import pandas as pd
18
  import requests
19
  import re
20
  from agent.component.base import ComponentBase, ComponentParamBase
21
- from api.utils.log_utils import logger
22
 
23
 
24
  class BaiduParam(ComponentParamBase):
@@ -62,6 +62,6 @@ class Baidu(ComponentBase, ABC):
62
  return Baidu.be_output("")
63
 
64
  df = pd.DataFrame(baidu_res)
65
- logger.debug(f"df: {str(df)}")
66
  return df
67
 
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import pandas as pd
19
  import requests
20
  import re
21
  from agent.component.base import ComponentBase, ComponentParamBase
 
22
 
23
 
24
  class BaiduParam(ComponentParamBase):
 
62
  return Baidu.be_output("")
63
 
64
  df = pd.DataFrame(baidu_res)
65
+ logging.debug(f"df: {str(df)}")
66
  return df
67
 
agent/component/base.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import builtins
18
  import json
@@ -23,7 +24,6 @@ from typing import Tuple, Union
23
  import pandas as pd
24
 
25
  from agent import settings
26
- from api.utils.log_utils import logger
27
 
28
 
29
  _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
@@ -361,13 +361,13 @@ class ComponentParamBase(ABC):
361
 
362
  def _warn_deprecated_param(self, param_name, descr):
363
  if self._deprecated_params_set.get(param_name):
364
- logger.warning(
365
  f"{descr} {param_name} is deprecated and ignored in this version."
366
  )
367
 
368
  def _warn_to_deprecate_param(self, param_name, descr, new_param):
369
  if self._deprecated_params_set.get(param_name):
370
- logger.warning(
371
  f"{descr} {param_name} will be deprecated in future release; "
372
  f"please use {new_param} instead."
373
  )
@@ -403,7 +403,7 @@ class ComponentBase(ABC):
403
  return cpnts
404
 
405
  def run(self, history, **kwargs):
406
- logger.info("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
407
  json.dumps(kwargs, ensure_ascii=False)))
408
  try:
409
  res = self._run(history, **kwargs)
@@ -476,7 +476,7 @@ class ComponentBase(ABC):
476
  reversed_cpnts.extend(self._canvas.path[-2])
477
  reversed_cpnts.extend(self._canvas.path[-1])
478
 
479
- logger.debug(f"{self.component_name} {reversed_cpnts[::-1]}")
480
  for u in reversed_cpnts[::-1]:
481
  if self.get_component_name(u) in ["switch", "concentrator"]: continue
482
  if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import builtins
19
  import json
 
24
  import pandas as pd
25
 
26
  from agent import settings
 
27
 
28
 
29
  _FEEDED_DEPRECATED_PARAMS = "_feeded_deprecated_params"
 
361
 
362
  def _warn_deprecated_param(self, param_name, descr):
363
  if self._deprecated_params_set.get(param_name):
364
+ logging.warning(
365
  f"{descr} {param_name} is deprecated and ignored in this version."
366
  )
367
 
368
  def _warn_to_deprecate_param(self, param_name, descr, new_param):
369
  if self._deprecated_params_set.get(param_name):
370
+ logging.warning(
371
  f"{descr} {param_name} will be deprecated in future release; "
372
  f"please use {new_param} instead."
373
  )
 
403
  return cpnts
404
 
405
  def run(self, history, **kwargs):
406
+ logging.debug("{}, history: {}, kwargs: {}".format(self, json.dumps(history, ensure_ascii=False),
407
  json.dumps(kwargs, ensure_ascii=False)))
408
  try:
409
  res = self._run(history, **kwargs)
 
476
  reversed_cpnts.extend(self._canvas.path[-2])
477
  reversed_cpnts.extend(self._canvas.path[-1])
478
 
479
+ logging.debug(f"{self.component_name} {reversed_cpnts[::-1]}")
480
  for u in reversed_cpnts[::-1]:
481
  if self.get_component_name(u) in ["switch", "concentrator"]: continue
482
  if self.component_name.lower() == "generate" and self.get_component_name(u) == "retrieval":
agent/component/bing.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import requests
18
  import pandas as pd
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
- from api.utils.log_utils import logger
21
 
22
  class BingParam(ComponentParamBase):
23
  """
@@ -80,5 +80,5 @@ class Bing(ComponentBase, ABC):
80
  return Bing.be_output("")
81
 
82
  df = pd.DataFrame(bing_res)
83
- logger.debug(f"df: {str(df)}")
84
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import requests
19
  import pandas as pd
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
  class BingParam(ComponentParamBase):
23
  """
 
80
  return Bing.be_output("")
81
 
82
  df = pd.DataFrame(bing_res)
83
+ logging.debug(f"df: {str(df)}")
84
  return df
agent/component/categorize.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class CategorizeParam(GenerateParam):
@@ -77,7 +77,7 @@ class Categorize(Generate, ABC):
77
  chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
78
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
79
  self._param.gen_conf())
80
- logger.debug(f"input: {input}, answer: {str(ans)}")
81
  for c in self._param.category_description.keys():
82
  if ans.lower().find(c.lower()) >= 0:
83
  return Categorize.be_output(self._param.category_description[c]["to"])
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  from api.db import LLMType
19
  from api.db.services.llm_service import LLMBundle
20
  from agent.component import GenerateParam, Generate
 
21
 
22
 
23
  class CategorizeParam(GenerateParam):
 
77
  chat_mdl = LLMBundle(self._canvas.get_tenant_id(), LLMType.CHAT, self._param.llm_id)
78
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": input}],
79
  self._param.gen_conf())
80
+ logging.debug(f"input: {input}, answer: {str(ans)}")
81
  for c in self._param.category_description.keys():
82
  if ans.lower().find(c.lower()) >= 0:
83
  return Categorize.be_output(self._param.category_description[c]["to"])
agent/component/duckduckgo.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  from duckduckgo_search import DDGS
18
  import pandas as pd
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class DuckDuckGoParam(ComponentParamBase):
@@ -62,5 +62,5 @@ class DuckDuckGo(ComponentBase, ABC):
62
  return DuckDuckGo.be_output("")
63
 
64
  df = pd.DataFrame(duck_res)
65
- logger.debug("df: {df}")
66
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  from duckduckgo_search import DDGS
19
  import pandas as pd
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
 
23
  class DuckDuckGoParam(ComponentParamBase):
 
62
  return DuckDuckGo.be_output("")
63
 
64
  df = pd.DataFrame(duck_res)
65
+ logging.debug("df: {df}")
66
  return df
agent/component/github.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import pandas as pd
18
  import requests
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class GitHubParam(ComponentParamBase):
@@ -57,5 +57,5 @@ class GitHub(ComponentBase, ABC):
57
  return GitHub.be_output("")
58
 
59
  df = pd.DataFrame(github_res)
60
- logger.debug(f"df: {df}")
61
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import pandas as pd
19
  import requests
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
 
23
  class GitHubParam(ComponentParamBase):
 
57
  return GitHub.be_output("")
58
 
59
  df = pd.DataFrame(github_res)
60
+ logging.debug(f"df: {df}")
61
  return df
agent/component/google.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  from serpapi import GoogleSearch
18
  import pandas as pd
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class GoogleParam(ComponentParamBase):
@@ -92,5 +92,5 @@ class Google(ComponentBase, ABC):
92
  return Google.be_output("")
93
 
94
  df = pd.DataFrame(google_res)
95
- logger.debug(f"df: {df}")
96
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  from serpapi import GoogleSearch
19
  import pandas as pd
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
 
23
  class GoogleParam(ComponentParamBase):
 
92
  return Google.be_output("")
93
 
94
  df = pd.DataFrame(google_res)
95
+ logging.debug(f"df: {df}")
96
  return df
agent/component/googlescholar.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import pandas as pd
18
  from agent.component.base import ComponentBase, ComponentParamBase
19
  from scholarly import scholarly
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class GoogleScholarParam(ComponentParamBase):
@@ -59,12 +59,12 @@ class GoogleScholar(ComponentBase, ABC):
59
  'bib'].get('abstract', 'no abstract')})
60
 
61
  except StopIteration or Exception:
62
- logger.exception("GoogleScholar")
63
  break
64
 
65
  if not scholar_res:
66
  return GoogleScholar.be_output("")
67
 
68
  df = pd.DataFrame(scholar_res)
69
- logger.debug(f"df: {df}")
70
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import pandas as pd
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
  from scholarly import scholarly
 
21
 
22
 
23
  class GoogleScholarParam(ComponentParamBase):
 
59
  'bib'].get('abstract', 'no abstract')})
60
 
61
  except StopIteration or Exception:
62
+ logging.exception("GoogleScholar")
63
  break
64
 
65
  if not scholar_res:
66
  return GoogleScholar.be_output("")
67
 
68
  df = pd.DataFrame(scholar_res)
69
+ logging.debug(f"df: {df}")
70
  return df
agent/component/keyword.py CHANGED
@@ -13,12 +13,12 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import re
17
  from abc import ABC
18
  from api.db import LLMType
19
  from api.db.services.llm_service import LLMBundle
20
  from agent.component import GenerateParam, Generate
21
- from api.utils.log_utils import logger
22
 
23
 
24
  class KeywordExtractParam(GenerateParam):
@@ -58,5 +58,5 @@ class KeywordExtract(Generate, ABC):
58
  self._param.gen_conf())
59
 
60
  ans = re.sub(r".*keyword:", "", ans).strip()
61
- logger.info(f"ans: {ans}")
62
  return KeywordExtract.be_output(ans)
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import re
18
  from abc import ABC
19
  from api.db import LLMType
20
  from api.db.services.llm_service import LLMBundle
21
  from agent.component import GenerateParam, Generate
 
22
 
23
 
24
  class KeywordExtractParam(GenerateParam):
 
58
  self._param.gen_conf())
59
 
60
  ans = re.sub(r".*keyword:", "", ans).strip()
61
+ logging.debug(f"ans: {ans}")
62
  return KeywordExtract.be_output(ans)
agent/component/pubmed.py CHANGED
@@ -13,13 +13,13 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  from Bio import Entrez
18
  import re
19
  import pandas as pd
20
  import xml.etree.ElementTree as ET
21
  from agent.component.base import ComponentBase, ComponentParamBase
22
- from api.utils.log_utils import logger
23
 
24
 
25
  class PubMedParam(ComponentParamBase):
@@ -65,5 +65,5 @@ class PubMed(ComponentBase, ABC):
65
  return PubMed.be_output("")
66
 
67
  df = pd.DataFrame(pubmed_res)
68
- logger.debug(f"df: {df}")
69
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  from Bio import Entrez
19
  import re
20
  import pandas as pd
21
  import xml.etree.ElementTree as ET
22
  from agent.component.base import ComponentBase, ComponentParamBase
 
23
 
24
 
25
  class PubMedParam(ComponentParamBase):
 
65
  return PubMed.be_output("")
66
 
67
  df = pd.DataFrame(pubmed_res)
68
+ logging.debug(f"df: {df}")
69
  return df
agent/component/relevant.py CHANGED
@@ -13,12 +13,12 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
  from rag.utils import num_tokens_from_string, encoder
21
- from api.utils.log_utils import logger
22
 
23
 
24
  class RelevantParam(GenerateParam):
@@ -71,7 +71,7 @@ class Relevant(Generate, ABC):
71
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
72
  self._param.gen_conf())
73
 
74
- logger.info(ans)
75
  if ans.lower().find("yes") >= 0:
76
  return Relevant.be_output(self._param.yes)
77
  if ans.lower().find("no") >= 0:
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  from api.db import LLMType
19
  from api.db.services.llm_service import LLMBundle
20
  from agent.component import GenerateParam, Generate
21
  from rag.utils import num_tokens_from_string, encoder
 
22
 
23
 
24
  class RelevantParam(GenerateParam):
 
71
  ans = chat_mdl.chat(self._param.get_prompt(), [{"role": "user", "content": ans}],
72
  self._param.gen_conf())
73
 
74
+ logging.debug(ans)
75
  if ans.lower().find("yes") >= 0:
76
  return Relevant.be_output(self._param.yes)
77
  if ans.lower().find("no") >= 0:
agent/component/retrieval.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
 
18
  import pandas as pd
@@ -22,7 +23,6 @@ from api.db.services.knowledgebase_service import KnowledgebaseService
22
  from api.db.services.llm_service import LLMBundle
23
  from api.settings import retrievaler
24
  from agent.component.base import ComponentBase, ComponentParamBase
25
- from api.utils.log_utils import logger
26
 
27
 
28
  class RetrievalParam(ComponentParamBase):
@@ -81,7 +81,7 @@ class Retrieval(ComponentBase, ABC):
81
  df = pd.DataFrame(kbinfos["chunks"])
82
  df["content"] = df["content_with_weight"]
83
  del df["content_with_weight"]
84
- logger.debug("{} {}".format(query, df))
85
  return df
86
 
87
 
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
 
19
  import pandas as pd
 
23
  from api.db.services.llm_service import LLMBundle
24
  from api.settings import retrievaler
25
  from agent.component.base import ComponentBase, ComponentParamBase
 
26
 
27
 
28
  class RetrievalParam(ComponentParamBase):
 
81
  df = pd.DataFrame(kbinfos["chunks"])
82
  df["content"] = df["content_with_weight"]
83
  del df["content_with_weight"]
84
+ logging.debug("{} {}".format(query, df))
85
  return df
86
 
87
 
agent/component/rewrite.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  from api.db import LLMType
18
  from api.db.services.llm_service import LLMBundle
19
  from agent.component import GenerateParam, Generate
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class RewriteQuestionParam(GenerateParam):
@@ -105,7 +105,7 @@ class RewriteQuestion(Generate, ABC):
105
  self._canvas.history.pop()
106
  self._canvas.history.append(("user", ans))
107
 
108
- logger.info(ans)
109
  return RewriteQuestion.be_output(ans)
110
 
111
 
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  from api.db import LLMType
19
  from api.db.services.llm_service import LLMBundle
20
  from agent.component import GenerateParam, Generate
 
21
 
22
 
23
  class RewriteQuestionParam(GenerateParam):
 
105
  self._canvas.history.pop()
106
  self._canvas.history.append(("user", ans))
107
 
108
+ logging.debug(ans)
109
  return RewriteQuestion.be_output(ans)
110
 
111
 
agent/component/wikipedia.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import wikipedia
18
  import pandas as pd
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class WikipediaParam(ComponentParamBase):
@@ -63,5 +63,5 @@ class Wikipedia(ComponentBase, ABC):
63
  return Wikipedia.be_output("")
64
 
65
  df = pd.DataFrame(wiki_res)
66
- logger.debug(f"df: {df}")
67
  return df
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import wikipedia
19
  import pandas as pd
20
  from agent.component.base import ComponentBase, ComponentParamBase
 
21
 
22
 
23
  class WikipediaParam(ComponentParamBase):
 
63
  return Wikipedia.be_output("")
64
 
65
  df = pd.DataFrame(wiki_res)
66
+ logging.debug(f"df: {df}")
67
  return df
agent/component/yahoofinance.py CHANGED
@@ -13,11 +13,11 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from abc import ABC
17
  import pandas as pd
18
  from agent.component.base import ComponentBase, ComponentParamBase
19
  import yfinance as yf
20
- from api.utils.log_utils import logger
21
 
22
 
23
  class YahooFinanceParam(ComponentParamBase):
@@ -76,7 +76,7 @@ class YahooFinance(ComponentBase, ABC):
76
  if self._param.news:
77
  yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
78
  except Exception:
79
- logger.exception("YahooFinance got exception")
80
 
81
  if not yohoo_res:
82
  return YahooFinance.be_output("")
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from abc import ABC
18
  import pandas as pd
19
  from agent.component.base import ComponentBase, ComponentParamBase
20
  import yfinance as yf
 
21
 
22
 
23
  class YahooFinanceParam(ComponentParamBase):
 
76
  if self._param.news:
77
  yohoo_res.append({"content": "news:\n" + pd.DataFrame(msft.news).to_markdown() + "\n"})
78
  except Exception:
79
+ logging.exception("YahooFinance got exception")
80
 
81
  if not yohoo_res:
82
  return YahooFinance.be_output("")
api/apps/__init__.py CHANGED
@@ -15,6 +15,7 @@
15
  #
16
  import os
17
  import sys
 
18
  from importlib.util import module_from_spec, spec_from_file_location
19
  from pathlib import Path
20
  from flask import Blueprint, Flask
@@ -32,7 +33,6 @@ from flask_login import LoginManager
32
  from api.settings import SECRET_KEY
33
  from api.settings import API_VERSION
34
  from api.utils.api_utils import server_error_response
35
- from api.utils.log_utils import logger
36
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
37
 
38
  __all__ = ["app"]
@@ -154,7 +154,7 @@ def load_user(web_request):
154
  else:
155
  return None
156
  except Exception:
157
- logger.exception("load_user got exception")
158
  return None
159
  else:
160
  return None
 
15
  #
16
  import os
17
  import sys
18
+ import logging
19
  from importlib.util import module_from_spec, spec_from_file_location
20
  from pathlib import Path
21
  from flask import Blueprint, Flask
 
33
  from api.settings import SECRET_KEY
34
  from api.settings import API_VERSION
35
  from api.utils.api_utils import server_error_response
 
36
  from itsdangerous.url_safe import URLSafeTimedSerializer as Serializer
37
 
38
  __all__ = ["app"]
 
154
  else:
155
  return None
156
  except Exception:
157
+ logging.exception("load_user got exception")
158
  return None
159
  else:
160
  return None
api/apps/canvas_app.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import json
17
  from functools import partial
18
  from flask import request, Response
@@ -23,7 +24,6 @@ from api.utils import get_uuid
23
  from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
24
  from agent.canvas import Canvas
25
  from peewee import MySQLDatabase, PostgresqlDatabase
26
- from api.utils.log_utils import logger
27
 
28
 
29
  @manager.route('/templates', methods=['GET'])
@@ -115,7 +115,7 @@ def run():
115
  pass
116
  canvas.add_user_input(req["message"])
117
  answer = canvas.run(stream=stream)
118
- logger.info(canvas)
119
  except Exception as e:
120
  return server_error_response(e)
121
 
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import json
18
  from functools import partial
19
  from flask import request, Response
 
24
  from api.utils.api_utils import get_json_result, server_error_response, validate_request, get_data_error_result
25
  from agent.canvas import Canvas
26
  from peewee import MySQLDatabase, PostgresqlDatabase
 
27
 
28
 
29
  @manager.route('/templates', methods=['GET'])
 
115
  pass
116
  canvas.add_user_input(req["message"])
117
  answer = canvas.run(stream=stream)
118
+ logging.debug(canvas)
119
  except Exception as e:
120
  return server_error_response(e)
121
 
api/apps/llm_app.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import json
17
 
18
  from flask import request
@@ -25,7 +26,6 @@ from api.db.db_models import TenantLLM
25
  from api.utils.api_utils import get_json_result
26
  from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
27
  import requests
28
- from api.utils.log_utils import logger
29
 
30
 
31
  @manager.route('/factories', methods=['GET'])
@@ -90,7 +90,7 @@ def set_api_key():
90
  if len(arr) == 0 or tc == 0:
91
  raise Exception("Fail")
92
  rerank_passed = True
93
- logger.info(f'passed model rerank {llm.llm_name}')
94
  except Exception as e:
95
  msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
96
  e)
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import json
18
 
19
  from flask import request
 
26
  from api.utils.api_utils import get_json_result
27
  from rag.llm import EmbeddingModel, ChatModel, RerankModel, CvModel, TTSModel
28
  import requests
 
29
 
30
 
31
  @manager.route('/factories', methods=['GET'])
 
90
  if len(arr) == 0 or tc == 0:
91
  raise Exception("Fail")
92
  rerank_passed = True
93
+ logging.debug(f'passed model rerank {llm.llm_name}')
94
  except Exception as e:
95
  msg += f"\nFail to access model({llm.llm_name}) using this api key." + str(
96
  e)
api/apps/user_app.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import json
17
  import re
18
  from datetime import datetime
@@ -54,7 +55,6 @@ from api.settings import (
54
  from api.db.services.user_service import UserService, TenantService, UserTenantService
55
  from api.db.services.file_service import FileService
56
  from api.utils.api_utils import get_json_result, construct_response
57
- from api.utils.log_utils import logger
58
 
59
 
60
  @manager.route("/login", methods=["POST", "GET"])
@@ -177,7 +177,7 @@ def github_callback():
177
  try:
178
  avatar = download_img(user_info["avatar_url"])
179
  except Exception as e:
180
- logger.exception(e)
181
  avatar = ""
182
  users = user_register(
183
  user_id,
@@ -202,7 +202,7 @@ def github_callback():
202
  return redirect("/?auth=%s" % user.get_id())
203
  except Exception as e:
204
  rollback_user_registration(user_id)
205
- logger.exception(e)
206
  return redirect("/?error=%s" % str(e))
207
 
208
  # User has already registered, try to log in
@@ -279,7 +279,7 @@ def feishu_callback():
279
  try:
280
  avatar = download_img(user_info["avatar_url"])
281
  except Exception as e:
282
- logger.exception(e)
283
  avatar = ""
284
  users = user_register(
285
  user_id,
@@ -304,7 +304,7 @@ def feishu_callback():
304
  return redirect("/?auth=%s" % user.get_id())
305
  except Exception as e:
306
  rollback_user_registration(user_id)
307
- logger.exception(e)
308
  return redirect("/?error=%s" % str(e))
309
 
310
  # User has already registered, try to log in
@@ -436,7 +436,7 @@ def setting_user():
436
  UserService.update_by_id(current_user.id, update_dict)
437
  return get_json_result(data=True)
438
  except Exception as e:
439
- logger.exception(e)
440
  return get_json_result(
441
  data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
442
  )
@@ -621,7 +621,7 @@ def user_add():
621
  )
622
  except Exception as e:
623
  rollback_user_registration(user_id)
624
- logger.exception(e)
625
  return get_json_result(
626
  data=False,
627
  message=f"User registration failure, error: {str(e)}",
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import json
18
  import re
19
  from datetime import datetime
 
55
  from api.db.services.user_service import UserService, TenantService, UserTenantService
56
  from api.db.services.file_service import FileService
57
  from api.utils.api_utils import get_json_result, construct_response
 
58
 
59
 
60
  @manager.route("/login", methods=["POST", "GET"])
 
177
  try:
178
  avatar = download_img(user_info["avatar_url"])
179
  except Exception as e:
180
+ logging.exception(e)
181
  avatar = ""
182
  users = user_register(
183
  user_id,
 
202
  return redirect("/?auth=%s" % user.get_id())
203
  except Exception as e:
204
  rollback_user_registration(user_id)
205
+ logging.exception(e)
206
  return redirect("/?error=%s" % str(e))
207
 
208
  # User has already registered, try to log in
 
279
  try:
280
  avatar = download_img(user_info["avatar_url"])
281
  except Exception as e:
282
+ logging.exception(e)
283
  avatar = ""
284
  users = user_register(
285
  user_id,
 
304
  return redirect("/?auth=%s" % user.get_id())
305
  except Exception as e:
306
  rollback_user_registration(user_id)
307
+ logging.exception(e)
308
  return redirect("/?error=%s" % str(e))
309
 
310
  # User has already registered, try to log in
 
436
  UserService.update_by_id(current_user.id, update_dict)
437
  return get_json_result(data=True)
438
  except Exception as e:
439
+ logging.exception(e)
440
  return get_json_result(
441
  data=False, message="Update failure!", code=RetCode.EXCEPTION_ERROR
442
  )
 
621
  )
622
  except Exception as e:
623
  rollback_user_registration(user_id)
624
+ logging.exception(e)
625
  return get_json_result(
626
  data=False,
627
  message=f"User registration failure, error: {str(e)}",
api/db/db_models.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import inspect
17
  import os
18
  import sys
@@ -32,7 +33,6 @@ from playhouse.pool import PooledMySQLDatabase, PooledPostgresqlDatabase
32
  from api.db import SerializedType, ParserType
33
  from api.settings import DATABASE, SECRET_KEY, DATABASE_TYPE
34
  from api import utils
35
- from api.utils.log_utils import logger
36
 
37
  def singleton(cls, *args, **kw):
38
  instances = {}
@@ -285,7 +285,7 @@ class BaseDataBase:
285
  database_config = DATABASE.copy()
286
  db_name = database_config.pop("name")
287
  self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
288
- logger.info('init database on cluster mode successfully')
289
 
290
  class PostgresDatabaseLock:
291
  def __init__(self, lock_name, timeout=10, db=None):
@@ -393,7 +393,7 @@ def close_connection():
393
  if DB:
394
  DB.close_stale(age=30)
395
  except Exception as e:
396
- logger.exception(e)
397
 
398
 
399
  class DataBaseModel(BaseModel):
@@ -409,15 +409,15 @@ def init_database_tables(alter_fields=[]):
409
  for name, obj in members:
410
  if obj != DataBaseModel and issubclass(obj, DataBaseModel):
411
  table_objs.append(obj)
412
- logger.info(f"start create table {obj.__name__}")
413
  try:
414
  obj.create_table()
415
- logger.info(f"create table success: {obj.__name__}")
416
  except Exception as e:
417
- logger.exception(e)
418
  create_failed_list.append(obj.__name__)
419
  if create_failed_list:
420
- logger.info(f"create tables failed: {create_failed_list}")
421
  raise Exception(f"create tables failed: {create_failed_list}")
422
  migrate_db()
423
 
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import inspect
18
  import os
19
  import sys
 
33
  from api.db import SerializedType, ParserType
34
  from api.settings import DATABASE, SECRET_KEY, DATABASE_TYPE
35
  from api import utils
 
36
 
37
  def singleton(cls, *args, **kw):
38
  instances = {}
 
285
  database_config = DATABASE.copy()
286
  db_name = database_config.pop("name")
287
  self.database_connection = PooledDatabase[DATABASE_TYPE.upper()].value(db_name, **database_config)
288
+ logging.info('init database on cluster mode successfully')
289
 
290
  class PostgresDatabaseLock:
291
  def __init__(self, lock_name, timeout=10, db=None):
 
393
  if DB:
394
  DB.close_stale(age=30)
395
  except Exception as e:
396
+ logging.exception(e)
397
 
398
 
399
  class DataBaseModel(BaseModel):
 
409
  for name, obj in members:
410
  if obj != DataBaseModel and issubclass(obj, DataBaseModel):
411
  table_objs.append(obj)
412
+ logging.debug(f"start create table {obj.__name__}")
413
  try:
414
  obj.create_table()
415
+ logging.debug(f"create table success: {obj.__name__}")
416
  except Exception as e:
417
+ logging.exception(e)
418
  create_failed_list.append(obj.__name__)
419
  if create_failed_list:
420
+ logging.error(f"create tables failed: {create_failed_list}")
421
  raise Exception(f"create tables failed: {create_failed_list}")
422
  migrate_db()
423
 
api/db/init_data.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import base64
17
  import json
18
  import os
@@ -30,7 +31,6 @@ from api.db.services.llm_service import LLMFactoriesService, LLMService, TenantL
30
  from api.db.services.user_service import TenantService, UserTenantService
31
  from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
32
  from api.utils.file_utils import get_project_base_directory
33
- from api.utils.log_utils import logger
34
 
35
 
36
  def encode_to_base64(input_string):
@@ -70,26 +70,26 @@ def init_superuser():
70
  "api_key": API_KEY, "api_base": LLM_BASE_URL})
71
 
72
  if not UserService.save(**user_info):
73
- logger.info("can't init admin.")
74
  return
75
  TenantService.insert(**tenant)
76
  UserTenantService.insert(**usr_tenant)
77
  TenantLLMService.insert_many(tenant_llm)
78
- logger.info(
79
- "Super user initialized. email: [email protected], password: admin. Changing the password after logining is strongly recomanded.")
80
 
81
  chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
82
  msg = chat_mdl.chat(system="", history=[
83
  {"role": "user", "content": "Hello!"}], gen_conf={})
84
  if msg.find("ERROR: ") == 0:
85
- logger.error(
86
  "'{}' dosen't work. {}".format(
87
  tenant["llm_id"],
88
  msg))
89
  embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
90
  v, c = embd_mdl.encode(["Hello!"])
91
  if c == 0:
92
- logger.error(
93
  "'{}' dosen't work!".format(
94
  tenant["embd_id"]))
95
 
@@ -172,7 +172,7 @@ def add_graph_templates():
172
  except:
173
  CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
174
  except Exception:
175
- logger.exception("Add graph templates error: ")
176
 
177
 
178
  def init_web_data():
@@ -183,7 +183,7 @@ def init_web_data():
183
  # init_superuser()
184
 
185
  add_graph_templates()
186
- logger.info("init web data success:{}".format(time.time() - start_time))
187
 
188
 
189
  if __name__ == '__main__':
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import base64
18
  import json
19
  import os
 
31
  from api.db.services.user_service import TenantService, UserTenantService
32
  from api.settings import CHAT_MDL, EMBEDDING_MDL, ASR_MDL, IMAGE2TEXT_MDL, PARSERS, LLM_FACTORY, API_KEY, LLM_BASE_URL
33
  from api.utils.file_utils import get_project_base_directory
 
34
 
35
 
36
  def encode_to_base64(input_string):
 
70
  "api_key": API_KEY, "api_base": LLM_BASE_URL})
71
 
72
  if not UserService.save(**user_info):
73
+ logging.error("can't init admin.")
74
  return
75
  TenantService.insert(**tenant)
76
  UserTenantService.insert(**usr_tenant)
77
  TenantLLMService.insert_many(tenant_llm)
78
+ logging.info(
79
+ "Super user initialized. email: [email protected], password: admin. Changing the password after login is strongly recommended.")
80
 
81
  chat_mdl = LLMBundle(tenant["id"], LLMType.CHAT, tenant["llm_id"])
82
  msg = chat_mdl.chat(system="", history=[
83
  {"role": "user", "content": "Hello!"}], gen_conf={})
84
  if msg.find("ERROR: ") == 0:
85
+ logging.error(
86
  "'{}' dosen't work. {}".format(
87
  tenant["llm_id"],
88
  msg))
89
  embd_mdl = LLMBundle(tenant["id"], LLMType.EMBEDDING, tenant["embd_id"])
90
  v, c = embd_mdl.encode(["Hello!"])
91
  if c == 0:
92
+ logging.error(
93
  "'{}' dosen't work!".format(
94
  tenant["embd_id"]))
95
 
 
172
  except:
173
  CanvasTemplateService.update_by_id(cnvs["id"], cnvs)
174
  except Exception:
175
+ logging.exception("Add graph templates error: ")
176
 
177
 
178
  def init_web_data():
 
183
  # init_superuser()
184
 
185
  add_graph_templates()
186
+ logging.info("init web data success:{}".format(time.time() - start_time))
187
 
188
 
189
  if __name__ == '__main__':
api/db/services/dialog_service.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import binascii
17
  import os
18
  import json
@@ -31,7 +32,6 @@ from rag.app.resume import forbidden_select_fields4resume
31
  from rag.nlp.search import index_name
32
  from rag.utils import rmSpace, num_tokens_from_string, encoder
33
  from api.utils.file_utils import get_project_base_directory
34
- from api.utils.log_utils import logger
35
 
36
 
37
  class DialogService(CommonService):
@@ -178,7 +178,7 @@ def chat(dialog, messages, stream=True, **kwargs):
178
  tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
179
  # try to use sql if field mapping is good to go
180
  if field_map:
181
- logger.info("Use SQL to retrieval:{}".format(questions[-1]))
182
  ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
183
  if ans:
184
  yield ans
@@ -220,7 +220,7 @@ def chat(dialog, messages, stream=True, **kwargs):
220
  doc_ids=attachments,
221
  top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
222
  knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
223
- logger.info(
224
  "{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
225
  retrieval_tm = timer()
226
 
@@ -292,7 +292,7 @@ def chat(dialog, messages, stream=True, **kwargs):
292
  yield decorate_answer(answer)
293
  else:
294
  answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
295
- logger.info("User: {}|Assistant: {}".format(
296
  msg[-1]["content"], answer))
297
  res = decorate_answer(answer)
298
  res["audio_binary"] = tts(tts_mdl, answer)
@@ -320,7 +320,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
320
  nonlocal sys_prompt, user_promt, question, tried_times
321
  sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
322
  "temperature": 0.06})
323
- logger.info(f"{question} ==> {user_promt} get SQL: {sql}")
324
  sql = re.sub(r"[\r\n]+", " ", sql.lower())
325
  sql = re.sub(r".*select ", "select ", sql.lower())
326
  sql = re.sub(r" +", " ", sql)
@@ -340,7 +340,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
340
  flds.append(k)
341
  sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
342
 
343
- logger.info(f"{question} get SQL(refined): {sql}")
344
  tried_times += 1
345
  return retrievaler.sql_retrieval(sql, format="json"), sql
346
 
@@ -369,9 +369,9 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
369
  question, sql, tbl["error"]
370
  )
371
  tbl, sql = get_table()
372
- logger.info("TRY it again: {}".format(sql))
373
 
374
- logger.info("GET table: {}".format(tbl))
375
  if tbl.get("error") or len(tbl["rows"]) == 0:
376
  return None
377
 
@@ -401,7 +401,7 @@ def use_sql(question, field_map, tenant_id, chat_mdl, quota=True):
401
  rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
402
 
403
  if not docid_idx or not docnm_idx:
404
- logger.warning("SQL missing field: " + sql)
405
  return {
406
  "answer": "\n".join([clmns, line, rows]),
407
  "reference": {"chunks": [], "doc_aggs": []},
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import binascii
18
  import os
19
  import json
 
32
  from rag.nlp.search import index_name
33
  from rag.utils import rmSpace, num_tokens_from_string, encoder
34
  from api.utils.file_utils import get_project_base_directory
 
35
 
36
 
37
  class DialogService(CommonService):
 
178
  tts_mdl = LLMBundle(dialog.tenant_id, LLMType.TTS)
179
  # try to use sql if field mapping is good to go
180
  if field_map:
181
+ logging.debug("Use SQL to retrieval:{}".format(questions[-1]))
182
  ans = use_sql(questions[-1], field_map, dialog.tenant_id, chat_mdl, prompt_config.get("quote", True))
183
  if ans:
184
  yield ans
 
220
  doc_ids=attachments,
221
  top=dialog.top_k, aggs=False, rerank_mdl=rerank_mdl)
222
  knowledges = [ck["content_with_weight"] for ck in kbinfos["chunks"]]
223
+ logging.debug(
224
  "{}->{}".format(" ".join(questions), "\n->".join(knowledges)))
225
  retrieval_tm = timer()
226
 
 
292
  yield decorate_answer(answer)
293
  else:
294
  answer = chat_mdl.chat(prompt, msg[1:], gen_conf)
295
+ logging.debug("User: {}|Assistant: {}".format(
296
  msg[-1]["content"], answer))
297
  res = decorate_answer(answer)
298
  res["audio_binary"] = tts(tts_mdl, answer)
 
320
  nonlocal sys_prompt, user_promt, question, tried_times
321
  sql = chat_mdl.chat(sys_prompt, [{"role": "user", "content": user_promt}], {
322
  "temperature": 0.06})
323
+ logging.debug(f"{question} ==> {user_promt} get SQL: {sql}")
324
  sql = re.sub(r"[\r\n]+", " ", sql.lower())
325
  sql = re.sub(r".*select ", "select ", sql.lower())
326
  sql = re.sub(r" +", " ", sql)
 
340
  flds.append(k)
341
  sql = "select doc_id,docnm_kwd," + ",".join(flds) + sql[8:]
342
 
343
+ logging.debug(f"{question} get SQL(refined): {sql}")
344
  tried_times += 1
345
  return retrievaler.sql_retrieval(sql, format="json"), sql
346
 
 
369
  question, sql, tbl["error"]
370
  )
371
  tbl, sql = get_table()
372
+ logging.debug("TRY it again: {}".format(sql))
373
 
374
+ logging.debug("GET table: {}".format(tbl))
375
  if tbl.get("error") or len(tbl["rows"]) == 0:
376
  return None
377
 
 
401
  rows = re.sub(r"T[0-9]{2}:[0-9]{2}:[0-9]{2}(\.[0-9]+Z)?\|", "|", rows)
402
 
403
  if not docid_idx or not docnm_idx:
404
+ logging.warning("SQL missing field: " + sql)
405
  return {
406
  "answer": "\n".join([clmns, line, rows]),
407
  "reference": {"chunks": [], "doc_aggs": []},
api/db/services/document_service.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import hashlib
17
  import json
18
  import random
@@ -39,7 +40,6 @@ from api.db.services.common_service import CommonService
39
  from api.db.services.knowledgebase_service import KnowledgebaseService
40
  from api.db import StatusEnum
41
  from rag.utils.redis_conn import REDIS_CONN
42
- from api.utils.log_utils import logger
43
 
44
 
45
  class DocumentService(CommonService):
@@ -387,7 +387,7 @@ class DocumentService(CommonService):
387
  cls.update_by_id(d["id"], info)
388
  except Exception as e:
389
  if str(e).find("'0'") < 0:
390
- logger.exception("fetch task exception")
391
 
392
  @classmethod
393
  @DB.connection_context()
@@ -544,7 +544,7 @@ def doc_upload_and_parse(conversation_id, file_objs, user_id):
544
  "knowledge_graph_kwd": "mind_map"
545
  })
546
  except Exception as e:
547
- logger.exception("Mind map generation error")
548
 
549
  vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
550
  assert len(cks) == len(vects)
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import hashlib
18
  import json
19
  import random
 
40
  from api.db.services.knowledgebase_service import KnowledgebaseService
41
  from api.db import StatusEnum
42
  from rag.utils.redis_conn import REDIS_CONN
 
43
 
44
 
45
  class DocumentService(CommonService):
 
387
  cls.update_by_id(d["id"], info)
388
  except Exception as e:
389
  if str(e).find("'0'") < 0:
390
+ logging.exception("fetch task exception")
391
 
392
  @classmethod
393
  @DB.connection_context()
 
544
  "knowledge_graph_kwd": "mind_map"
545
  })
546
  except Exception as e:
547
+ logging.exception("Mind map generation error")
548
 
549
  vects = embedding(doc_id, [c["content_with_weight"] for c in cks])
550
  assert len(cks) == len(vects)
api/db/services/file_service.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import re
17
  import os
18
  from concurrent.futures import ThreadPoolExecutor
@@ -30,7 +31,6 @@ from api.db.services.file2document_service import File2DocumentService
30
  from api.utils import get_uuid
31
  from api.utils.file_utils import filename_type, thumbnail_img
32
  from rag.utils.storage_factory import STORAGE_IMPL
33
- from api.utils.log_utils import logger
34
 
35
 
36
  class FileService(CommonService):
@@ -276,7 +276,7 @@ class FileService(CommonService):
276
  return cls.model.delete().where((cls.model.tenant_id == user_id)
277
  & (cls.model.id == folder_id)).execute(),
278
  except Exception:
279
- logger.exception("delete_folder_by_pf_id")
280
  raise RuntimeError("Database error (File retrieval)!")
281
 
282
  @classmethod
@@ -325,7 +325,7 @@ class FileService(CommonService):
325
  try:
326
  cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
327
  except Exception:
328
- logger.exception("move_file")
329
  raise RuntimeError("Database error (File move)!")
330
 
331
  @classmethod
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import re
18
  import os
19
  from concurrent.futures import ThreadPoolExecutor
 
31
  from api.utils import get_uuid
32
  from api.utils.file_utils import filename_type, thumbnail_img
33
  from rag.utils.storage_factory import STORAGE_IMPL
 
34
 
35
 
36
  class FileService(CommonService):
 
276
  return cls.model.delete().where((cls.model.tenant_id == user_id)
277
  & (cls.model.id == folder_id)).execute(),
278
  except Exception:
279
+ logging.exception("delete_folder_by_pf_id")
280
  raise RuntimeError("Database error (File retrieval)!")
281
 
282
  @classmethod
 
325
  try:
326
  cls.filter_update((cls.model.id << file_ids, ), { 'parent_id': folder_id })
327
  except Exception:
328
+ logging.exception("move_file")
329
  raise RuntimeError("Database error (File move)!")
330
 
331
  @classmethod
api/db/services/llm_service.py CHANGED
@@ -13,13 +13,13 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  from api.db.services.user_service import TenantService
17
  from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
18
  from api.db import LLMType
19
  from api.db.db_models import DB
20
  from api.db.db_models import LLMFactories, LLM, TenantLLM
21
  from api.db.services.common_service import CommonService
22
- from api.utils.log_utils import logger
23
 
24
 
25
  class LLMFactoriesService(CommonService):
@@ -209,7 +209,7 @@ class LLMBundle(object):
209
  emd, used_tokens = self.mdl.encode(texts, batch_size)
210
  if not TenantLLMService.increase_usage(
211
  self.tenant_id, self.llm_type, used_tokens):
212
- logger.error(
213
  "LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
214
  return emd, used_tokens
215
 
@@ -217,7 +217,7 @@ class LLMBundle(object):
217
  emd, used_tokens = self.mdl.encode_queries(query)
218
  if not TenantLLMService.increase_usage(
219
  self.tenant_id, self.llm_type, used_tokens):
220
- logger.error(
221
  "LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
222
  return emd, used_tokens
223
 
@@ -225,7 +225,7 @@ class LLMBundle(object):
225
  sim, used_tokens = self.mdl.similarity(query, texts)
226
  if not TenantLLMService.increase_usage(
227
  self.tenant_id, self.llm_type, used_tokens):
228
- logger.error(
229
  "LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
230
  return sim, used_tokens
231
 
@@ -233,7 +233,7 @@ class LLMBundle(object):
233
  txt, used_tokens = self.mdl.describe(image, max_tokens)
234
  if not TenantLLMService.increase_usage(
235
  self.tenant_id, self.llm_type, used_tokens):
236
- logger.error(
237
  "LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
238
  return txt
239
 
@@ -241,7 +241,7 @@ class LLMBundle(object):
241
  txt, used_tokens = self.mdl.transcription(audio)
242
  if not TenantLLMService.increase_usage(
243
  self.tenant_id, self.llm_type, used_tokens):
244
- logger.error(
245
  "LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
246
  return txt
247
 
@@ -250,7 +250,7 @@ class LLMBundle(object):
250
  if isinstance(chunk,int):
251
  if not TenantLLMService.increase_usage(
252
  self.tenant_id, self.llm_type, chunk, self.llm_name):
253
- logger.error(
254
  "LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
255
  return
256
  yield chunk
@@ -259,7 +259,7 @@ class LLMBundle(object):
259
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
260
  if isinstance(txt, int) and not TenantLLMService.increase_usage(
261
  self.tenant_id, self.llm_type, used_tokens, self.llm_name):
262
- logger.error(
263
  "LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
264
  return txt
265
 
@@ -268,7 +268,7 @@ class LLMBundle(object):
268
  if isinstance(txt, int):
269
  if not TenantLLMService.increase_usage(
270
  self.tenant_id, self.llm_type, txt, self.llm_name):
271
- logger.error(
272
  "LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
273
  return
274
  yield txt
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  from api.db.services.user_service import TenantService
18
  from rag.llm import EmbeddingModel, CvModel, ChatModel, RerankModel, Seq2txtModel, TTSModel
19
  from api.db import LLMType
20
  from api.db.db_models import DB
21
  from api.db.db_models import LLMFactories, LLM, TenantLLM
22
  from api.db.services.common_service import CommonService
 
23
 
24
 
25
  class LLMFactoriesService(CommonService):
 
209
  emd, used_tokens = self.mdl.encode(texts, batch_size)
210
  if not TenantLLMService.increase_usage(
211
  self.tenant_id, self.llm_type, used_tokens):
212
+ logging.error(
213
  "LLMBundle.encode can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
214
  return emd, used_tokens
215
 
 
217
  emd, used_tokens = self.mdl.encode_queries(query)
218
  if not TenantLLMService.increase_usage(
219
  self.tenant_id, self.llm_type, used_tokens):
220
+ logging.error(
221
  "LLMBundle.encode_queries can't update token usage for {}/EMBEDDING used_tokens: {}".format(self.tenant_id, used_tokens))
222
  return emd, used_tokens
223
 
 
225
  sim, used_tokens = self.mdl.similarity(query, texts)
226
  if not TenantLLMService.increase_usage(
227
  self.tenant_id, self.llm_type, used_tokens):
228
+ logging.error(
229
  "LLMBundle.similarity can't update token usage for {}/RERANK used_tokens: {}".format(self.tenant_id, used_tokens))
230
  return sim, used_tokens
231
 
 
233
  txt, used_tokens = self.mdl.describe(image, max_tokens)
234
  if not TenantLLMService.increase_usage(
235
  self.tenant_id, self.llm_type, used_tokens):
236
+ logging.error(
237
  "LLMBundle.describe can't update token usage for {}/IMAGE2TEXT used_tokens: {}".format(self.tenant_id, used_tokens))
238
  return txt
239
 
 
241
  txt, used_tokens = self.mdl.transcription(audio)
242
  if not TenantLLMService.increase_usage(
243
  self.tenant_id, self.llm_type, used_tokens):
244
+ logging.error(
245
  "LLMBundle.transcription can't update token usage for {}/SEQUENCE2TXT used_tokens: {}".format(self.tenant_id, used_tokens))
246
  return txt
247
 
 
250
  if isinstance(chunk,int):
251
  if not TenantLLMService.increase_usage(
252
  self.tenant_id, self.llm_type, chunk, self.llm_name):
253
+ logging.error(
254
  "LLMBundle.tts can't update token usage for {}/TTS".format(self.tenant_id))
255
  return
256
  yield chunk
 
259
  txt, used_tokens = self.mdl.chat(system, history, gen_conf)
260
  if isinstance(txt, int) and not TenantLLMService.increase_usage(
261
  self.tenant_id, self.llm_type, used_tokens, self.llm_name):
262
+ logging.error(
263
  "LLMBundle.chat can't update token usage for {}/CHAT llm_name: {}, used_tokens: {}".format(self.tenant_id, self.llm_name, used_tokens))
264
  return txt
265
 
 
268
  if isinstance(txt, int):
269
  if not TenantLLMService.increase_usage(
270
  self.tenant_id, self.llm_type, txt, self.llm_name):
271
+ logging.error(
272
  "LLMBundle.chat_streamly can't update token usage for {}/CHAT llm_name: {}, content: {}".format(self.tenant_id, self.llm_name, txt))
273
  return
274
  yield txt
api/ragflow_server.py CHANGED
@@ -15,6 +15,17 @@
15
  #
16
 
17
  import logging
 
 
 
 
 
 
 
 
 
 
 
18
  import os
19
  import signal
20
  import sys
@@ -22,7 +33,6 @@ import time
22
  import traceback
23
  from concurrent.futures import ThreadPoolExecutor
24
 
25
- import validation
26
  from werkzeug.serving import run_simple
27
  from api.apps import app
28
  from api.db.runtime_config import RuntimeConfig
@@ -31,7 +41,6 @@ from api.settings import (
31
  HOST, HTTP_PORT
32
  )
33
  from api import utils
34
- from api.utils.log_utils import logger
35
 
36
  from api.db.db_models import init_database_tables as init_web_db
37
  from api.db.init_data import init_web_data
@@ -44,11 +53,11 @@ def update_progress():
44
  try:
45
  DocumentService.update_progress()
46
  except Exception:
47
- logger.exception("update_progress exception")
48
 
49
 
50
  if __name__ == '__main__':
51
- logger.info(r"""
52
  ____ ___ ______ ______ __
53
  / __ \ / | / ____// ____// /____ _ __
54
  / /_/ // /| | / / __ / /_ / // __ \| | /| / /
@@ -56,10 +65,10 @@ if __name__ == '__main__':
56
  /_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
57
 
58
  """)
59
- logger.info(
60
  f'RAGFlow version: {RAGFLOW_VERSION_INFO}'
61
  )
62
- logger.info(
63
  f'project base: {utils.file_utils.get_project_base_directory()}'
64
  )
65
 
@@ -83,26 +92,18 @@ if __name__ == '__main__':
83
 
84
  RuntimeConfig.DEBUG = args.debug
85
  if RuntimeConfig.DEBUG:
86
- logger.info("run on debug mode")
87
 
88
  RuntimeConfig.init_env()
89
  RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
90
 
91
- peewee_logger = logging.getLogger("peewee")
92
- peewee_logger.propagate = False
93
- # rag_arch.common.log.ROpenHandler
94
- peewee_logger.addHandler(logger.handlers[0])
95
- peewee_logger.setLevel(logger.handlers[0].level)
96
 
97
  thr = ThreadPoolExecutor(max_workers=1)
98
  thr.submit(update_progress)
99
 
100
  # start http server
101
  try:
102
- logger.info("RAG Flow http server start...")
103
- werkzeug_logger = logging.getLogger("werkzeug")
104
- for h in logger.handlers:
105
- werkzeug_logger.addHandler(h)
106
  run_simple(
107
  hostname=HOST,
108
  port=HTTP_PORT,
 
15
  #
16
 
17
  import logging
18
+ import inspect
19
+ from api.utils.log_utils import initRootLogger
20
+ initRootLogger(inspect.getfile(inspect.currentframe()))
21
+ for module in ["pdfminer"]:
22
+ module_logger = logging.getLogger(module)
23
+ module_logger.setLevel(logging.WARNING)
24
+ for module in ["peewee"]:
25
+ module_logger = logging.getLogger(module)
26
+ module_logger.handlers.clear()
27
+ module_logger.propagate = True
28
+
29
  import os
30
  import signal
31
  import sys
 
33
  import traceback
34
  from concurrent.futures import ThreadPoolExecutor
35
 
 
36
  from werkzeug.serving import run_simple
37
  from api.apps import app
38
  from api.db.runtime_config import RuntimeConfig
 
41
  HOST, HTTP_PORT
42
  )
43
  from api import utils
 
44
 
45
  from api.db.db_models import init_database_tables as init_web_db
46
  from api.db.init_data import init_web_data
 
53
  try:
54
  DocumentService.update_progress()
55
  except Exception:
56
+ logging.exception("update_progress exception")
57
 
58
 
59
  if __name__ == '__main__':
60
+ logging.info(r"""
61
  ____ ___ ______ ______ __
62
  / __ \ / | / ____// ____// /____ _ __
63
  / /_/ // /| | / / __ / /_ / // __ \| | /| / /
 
65
  /_/ |_|/_/ |_|\____//_/ /_/ \____/ |__/|__/
66
 
67
  """)
68
+ logging.info(
69
  f'RAGFlow version: {RAGFLOW_VERSION_INFO}'
70
  )
71
+ logging.info(
72
  f'project base: {utils.file_utils.get_project_base_directory()}'
73
  )
74
 
 
92
 
93
  RuntimeConfig.DEBUG = args.debug
94
  if RuntimeConfig.DEBUG:
95
+ logging.info("run on debug mode")
96
 
97
  RuntimeConfig.init_env()
98
  RuntimeConfig.init_config(JOB_SERVER_HOST=HOST, HTTP_PORT=HTTP_PORT)
99
 
 
 
 
 
 
100
 
101
  thr = ThreadPoolExecutor(max_workers=1)
102
  thr.submit(update_progress)
103
 
104
  # start http server
105
  try:
106
+ logging.info("RAG Flow http server start...")
 
 
 
107
  run_simple(
108
  hostname=HOST,
109
  port=HTTP_PORT,
api/utils/api_utils.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import functools
17
  import json
18
  import random
@@ -40,7 +41,6 @@ from api.settings import (
40
  from api.settings import RetCode
41
  from api.utils import CustomJSONEncoder, get_uuid
42
  from api.utils import json_dumps
43
- from api.utils.log_utils import logger
44
 
45
  requests.models.complexjson.dumps = functools.partial(
46
  json.dumps, cls=CustomJSONEncoder)
@@ -118,7 +118,7 @@ def get_data_error_result(code=RetCode.DATA_ERROR,
118
 
119
 
120
  def server_error_response(e):
121
- logger.exception(e)
122
  try:
123
  if e.code == 401:
124
  return get_json_result(code=401, message=repr(e))
@@ -259,7 +259,7 @@ def construct_json_result(code=RetCode.SUCCESS, message='success', data=None):
259
 
260
 
261
  def construct_error_response(e):
262
- logger.exception(e)
263
  try:
264
  if e.code == 401:
265
  return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import functools
18
  import json
19
  import random
 
41
  from api.settings import RetCode
42
  from api.utils import CustomJSONEncoder, get_uuid
43
  from api.utils import json_dumps
 
44
 
45
  requests.models.complexjson.dumps = functools.partial(
46
  json.dumps, cls=CustomJSONEncoder)
 
118
 
119
 
120
  def server_error_response(e):
121
+ logging.exception(e)
122
  try:
123
  if e.code == 401:
124
  return get_json_result(code=401, message=repr(e))
 
259
 
260
 
261
  def construct_error_response(e):
262
+ logging.exception(e)
263
  try:
264
  if e.code == 401:
265
  return construct_json_result(code=RetCode.UNAUTHORIZED, message=repr(e))
api/utils/log_utils.py CHANGED
@@ -14,38 +14,41 @@
14
  # limitations under the License.
15
  #
16
  import os
 
17
  import logging
18
  from logging.handlers import RotatingFileHandler
19
 
20
- from api.utils.file_utils import get_project_base_directory
21
-
22
- LOG_LEVEL = logging.INFO
23
- LOG_FILE = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"ragflow_{os.getpid()}.log"))
24
- LOG_FORMAT = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"
25
- logger = None
26
-
27
- def getLogger():
28
- global logger
29
- if logger is not None:
30
- return logger
31
-
32
- print(f"log file path: {LOG_FILE}")
33
- os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
34
- logger = logging.getLogger("ragflow")
35
- logger.setLevel(LOG_LEVEL)
36
-
37
- handler1 = RotatingFileHandler(LOG_FILE, maxBytes=10*1024*1024, backupCount=5)
38
- handler1.setLevel(LOG_LEVEL)
39
- formatter1 = logging.Formatter(LOG_FORMAT)
40
- handler1.setFormatter(formatter1)
 
 
 
 
41
  logger.addHandler(handler1)
42
 
43
  handler2 = logging.StreamHandler()
44
- handler2.setLevel(LOG_LEVEL)
45
- formatter2 = logging.Formatter(LOG_FORMAT)
46
- handler2.setFormatter(formatter2)
47
  logger.addHandler(handler2)
48
 
49
- return logger
50
-
51
- logger = getLogger()
 
14
  # limitations under the License.
15
  #
16
  import os
17
+ import os.path
18
  import logging
19
  from logging.handlers import RotatingFileHandler
20
 
21
+ def get_project_base_directory():
22
+ PROJECT_BASE = os.path.abspath(
23
+ os.path.join(
24
+ os.path.dirname(os.path.realpath(__file__)),
25
+ os.pardir,
26
+ os.pardir,
27
+ )
28
+ )
29
+ return PROJECT_BASE
30
+
31
+ def initRootLogger(script_path: str, log_level: int = logging.INFO, log_format: str = "%(asctime)-15s %(levelname)-8s %(process)d %(message)s"):
32
+ logger = logging.getLogger()
33
+ if logger.hasHandlers():
34
+ return
35
+
36
+ script_name = os.path.basename(script_path)
37
+ log_path = os.path.abspath(os.path.join(get_project_base_directory(), "logs", f"{os.path.splitext(script_name)[0]}.log"))
38
+
39
+ os.makedirs(os.path.dirname(log_path), exist_ok=True)
40
+ logger.setLevel(log_level)
41
+ formatter = logging.Formatter(log_format)
42
+
43
+ handler1 = RotatingFileHandler(log_path, maxBytes=10*1024*1024, backupCount=5)
44
+ handler1.setLevel(log_level)
45
+ handler1.setFormatter(formatter)
46
  logger.addHandler(handler1)
47
 
48
  handler2 = logging.StreamHandler()
49
+ handler2.setLevel(log_level)
50
+ handler2.setFormatter(formatter)
 
51
  logger.addHandler(handler2)
52
 
53
+ msg = f"{script_name} log path: {log_path}"
54
+ logger.info(msg)
 
api/validation.py CHANGED
@@ -14,20 +14,20 @@
14
  # limitations under the License.
15
  #
16
 
 
17
  import sys
18
- from api.utils.log_utils import logger
19
 
20
 
21
  def python_version_validation():
22
  # Check python version
23
  required_python_version = (3, 10)
24
  if sys.version_info < required_python_version:
25
- logger.info(
26
  f"Required Python: >= {required_python_version[0]}.{required_python_version[1]}. Current Python version: {sys.version_info[0]}.{sys.version_info[1]}."
27
  )
28
  sys.exit(1)
29
  else:
30
- logger.info(f"Python version: {sys.version_info[0]}.{sys.version_info[1]}")
31
 
32
 
33
  python_version_validation()
 
14
  # limitations under the License.
15
  #
16
 
17
+ import logging
18
  import sys
 
19
 
20
 
21
  def python_version_validation():
22
  # Check python version
23
  required_python_version = (3, 10)
24
  if sys.version_info < required_python_version:
25
+ logging.info(
26
  f"Required Python: >= {required_python_version[0]}.{required_python_version[1]}. Current Python version: {sys.version_info[0]}.{sys.version_info[1]}."
27
  )
28
  sys.exit(1)
29
  else:
30
+ logging.info(f"Python version: {sys.version_info[0]}.{sys.version_info[1]}")
31
 
32
 
33
  python_version_validation()
deepdoc/parser/pdf_parser.py CHANGED
@@ -11,6 +11,7 @@
11
  # limitations under the License.
12
  #
13
 
 
14
  import os
15
  import random
16
 
@@ -18,7 +19,6 @@ import xgboost as xgb
18
  from io import BytesIO
19
  import re
20
  import pdfplumber
21
- import logging
22
  from PIL import Image
23
  import numpy as np
24
  from timeit import default_timer as timer
@@ -26,15 +26,11 @@ from pypdf import PdfReader as pdf2_read
26
 
27
  from api.settings import LIGHTEN
28
  from api.utils.file_utils import get_project_base_directory
29
- from api.utils.log_utils import logger
30
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
31
  from rag.nlp import rag_tokenizer
32
  from copy import deepcopy
33
  from huggingface_hub import snapshot_download
34
 
35
- logging.getLogger("pdfminer").setLevel(logging.WARNING)
36
-
37
-
38
  class RAGFlowPdfParser:
39
  def __init__(self):
40
  self.ocr = OCR()
@@ -51,7 +47,7 @@ class RAGFlowPdfParser:
51
  if torch.cuda.is_available():
52
  self.updown_cnt_mdl.set_param({"device": "cuda"})
53
  except Exception:
54
- logger.exception("RAGFlowPdfParser __init__")
55
  try:
56
  model_dir = os.path.join(
57
  get_project_base_directory(),
@@ -188,7 +184,7 @@ class RAGFlowPdfParser:
188
  return True
189
 
190
  def _table_transformer_job(self, ZM):
191
- logger.info("Table processing...")
192
  imgs, pos = [], []
193
  tbcnt = [0]
194
  MARGIN = 10
@@ -426,7 +422,7 @@ class RAGFlowPdfParser:
426
  detach_feats = [b["x1"] < b_["x0"],
427
  b["x0"] > b_["x1"]]
428
  if (any(feats) and not any(concatting_feats)) or any(detach_feats):
429
- logger.info("{} {} {} {}".format(
430
  b["text"],
431
  b_["text"],
432
  any(feats),
@@ -727,14 +723,14 @@ class RAGFlowPdfParser:
727
  # continue
728
  if tv < fv and tk:
729
  tables[tk].insert(0, c)
730
- logger.debug(
731
  "TABLE:" +
732
  self.boxes[i]["text"] +
733
  "; Cap: " +
734
  tk)
735
  elif fk:
736
  figures[fk].insert(0, c)
737
- logger.debug(
738
  "FIGURE:" +
739
  self.boxes[i]["text"] +
740
  "; Cap: " +
@@ -761,7 +757,7 @@ class RAGFlowPdfParser:
761
  if ii is not None:
762
  b = louts[ii]
763
  else:
764
- logger.warn(
765
  f"Missing layout match: {pn + 1},%s" %
766
  (bxs[0].get(
767
  "layoutno", "")))
@@ -919,7 +915,7 @@ class RAGFlowPdfParser:
919
  if usefull(boxes[0]):
920
  dfs(boxes[0], 0)
921
  else:
922
- logger.debug("WASTE: " + boxes[0]["text"])
923
  except Exception:
924
  pass
925
  boxes.pop(0)
@@ -928,7 +924,7 @@ class RAGFlowPdfParser:
928
  res.append(
929
  "\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
930
  else:
931
- logger.debug("REMOVED: " +
932
  "<<".join([c["text"] for c in lines]))
933
 
934
  return "\n\n".join(res)
@@ -940,7 +936,7 @@ class RAGFlowPdfParser:
940
  fnm) if not binary else pdfplumber.open(BytesIO(binary))
941
  return len(pdf.pages)
942
  except Exception:
943
- logger.exception("total_page_number")
944
 
945
  def __images__(self, fnm, zoomin=3, page_from=0,
946
  page_to=299, callback=None):
@@ -964,7 +960,7 @@ class RAGFlowPdfParser:
964
  self.pdf.pages[page_from:page_to]]
965
  self.total_page = len(self.pdf.pages)
966
  except Exception:
967
- logger.exception("RAGFlowPdfParser __images__")
968
 
969
  self.outlines = []
970
  try:
@@ -980,11 +976,11 @@ class RAGFlowPdfParser:
980
 
981
  dfs(outlines, 0)
982
  except Exception as e:
983
- logger.warning(f"Outlines exception: {e}")
984
  if not self.outlines:
985
- logger.warning("Miss outlines")
986
 
987
- logger.info("Images converted.")
988
  self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
989
  random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
990
  range(len(self.page_chars))]
@@ -1024,7 +1020,7 @@ class RAGFlowPdfParser:
1024
  self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
1025
  "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
1026
 
1027
- logger.info("Is it English:", self.is_english)
1028
 
1029
  self.page_cum_height = np.cumsum(self.page_cum_height)
1030
  assert len(self.page_cum_height) == len(self.page_images) + 1
@@ -1164,9 +1160,9 @@ class PlainParser(object):
1164
 
1165
  dfs(outlines, 0)
1166
  except Exception:
1167
- logger.exception("Outlines exception")
1168
  if not self.outlines:
1169
- logger.warning("Miss outlines")
1170
 
1171
  return [(l, "") for l in lines], []
1172
 
 
11
  # limitations under the License.
12
  #
13
 
14
+ import logging
15
  import os
16
  import random
17
 
 
19
  from io import BytesIO
20
  import re
21
  import pdfplumber
 
22
  from PIL import Image
23
  import numpy as np
24
  from timeit import default_timer as timer
 
26
 
27
  from api.settings import LIGHTEN
28
  from api.utils.file_utils import get_project_base_directory
 
29
  from deepdoc.vision import OCR, Recognizer, LayoutRecognizer, TableStructureRecognizer
30
  from rag.nlp import rag_tokenizer
31
  from copy import deepcopy
32
  from huggingface_hub import snapshot_download
33
 
 
 
 
34
  class RAGFlowPdfParser:
35
  def __init__(self):
36
  self.ocr = OCR()
 
47
  if torch.cuda.is_available():
48
  self.updown_cnt_mdl.set_param({"device": "cuda"})
49
  except Exception:
50
+ logging.exception("RAGFlowPdfParser __init__")
51
  try:
52
  model_dir = os.path.join(
53
  get_project_base_directory(),
 
184
  return True
185
 
186
  def _table_transformer_job(self, ZM):
187
+ logging.debug("Table processing...")
188
  imgs, pos = [], []
189
  tbcnt = [0]
190
  MARGIN = 10
 
422
  detach_feats = [b["x1"] < b_["x0"],
423
  b["x0"] > b_["x1"]]
424
  if (any(feats) and not any(concatting_feats)) or any(detach_feats):
425
+ logging.debug("{} {} {} {}".format(
426
  b["text"],
427
  b_["text"],
428
  any(feats),
 
723
  # continue
724
  if tv < fv and tk:
725
  tables[tk].insert(0, c)
726
+ logging.debug(
727
  "TABLE:" +
728
  self.boxes[i]["text"] +
729
  "; Cap: " +
730
  tk)
731
  elif fk:
732
  figures[fk].insert(0, c)
733
+ logging.debug(
734
  "FIGURE:" +
735
  self.boxes[i]["text"] +
736
  "; Cap: " +
 
757
  if ii is not None:
758
  b = louts[ii]
759
  else:
760
+ logging.warn(
761
  f"Missing layout match: {pn + 1},%s" %
762
  (bxs[0].get(
763
  "layoutno", "")))
 
915
  if usefull(boxes[0]):
916
  dfs(boxes[0], 0)
917
  else:
918
+ logging.debug("WASTE: " + boxes[0]["text"])
919
  except Exception:
920
  pass
921
  boxes.pop(0)
 
924
  res.append(
925
  "\n".join([c["text"] + self._line_tag(c, ZM) for c in lines]))
926
  else:
927
+ logging.debug("REMOVED: " +
928
  "<<".join([c["text"] for c in lines]))
929
 
930
  return "\n\n".join(res)
 
936
  fnm) if not binary else pdfplumber.open(BytesIO(binary))
937
  return len(pdf.pages)
938
  except Exception:
939
+ logging.exception("total_page_number")
940
 
941
  def __images__(self, fnm, zoomin=3, page_from=0,
942
  page_to=299, callback=None):
 
960
  self.pdf.pages[page_from:page_to]]
961
  self.total_page = len(self.pdf.pages)
962
  except Exception:
963
+ logging.exception("RAGFlowPdfParser __images__")
964
 
965
  self.outlines = []
966
  try:
 
976
 
977
  dfs(outlines, 0)
978
  except Exception as e:
979
+ logging.warning(f"Outlines exception: {e}")
980
  if not self.outlines:
981
+ logging.warning("Miss outlines")
982
 
983
+ logging.debug("Images converted.")
984
  self.is_english = [re.search(r"[a-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}", "".join(
985
  random.choices([c["text"] for c in self.page_chars[i]], k=min(100, len(self.page_chars[i]))))) for i in
986
  range(len(self.page_chars))]
 
1020
  self.is_english = re.search(r"[\na-zA-Z0-9,/¸;:'\[\]\(\)!@#$%^&*\"?<>._-]{30,}",
1021
  "".join([b["text"] for b in random.choices(bxes, k=min(30, len(bxes)))]))
1022
 
1023
+ logging.debug("Is it English:", self.is_english)
1024
 
1025
  self.page_cum_height = np.cumsum(self.page_cum_height)
1026
  assert len(self.page_cum_height) == len(self.page_images) + 1
 
1160
 
1161
  dfs(outlines, 0)
1162
  except Exception:
1163
+ logging.exception("Outlines exception")
1164
  if not self.outlines:
1165
+ logging.warning("Miss outlines")
1166
 
1167
  return [(l, "") for l in lines], []
1168
 
deepdoc/parser/resume/entities/corporations.py CHANGED
@@ -11,13 +11,13 @@
11
  # limitations under the License.
12
  #
13
 
 
14
  import re
15
  import json
16
  import os
17
  import pandas as pd
18
  from rag.nlp import rag_tokenizer
19
  from . import regions
20
- from api.utils.log_utils import logger
21
 
22
 
23
  current_file_path = os.path.dirname(os.path.abspath(__file__))
@@ -71,7 +71,7 @@ GOOD_CORP = set([corpNorm(rmNoise(c), False) for c in GOOD_CORP])
71
  for c,v in CORP_TAG.items():
72
  cc = corpNorm(rmNoise(c), False)
73
  if not cc:
74
- logger.info(c)
75
  CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
76
 
77
  def is_good(nm):
 
11
  # limitations under the License.
12
  #
13
 
14
+ import logging
15
  import re
16
  import json
17
  import os
18
  import pandas as pd
19
  from rag.nlp import rag_tokenizer
20
  from . import regions
 
21
 
22
 
23
  current_file_path = os.path.dirname(os.path.abspath(__file__))
 
71
  for c,v in CORP_TAG.items():
72
  cc = corpNorm(rmNoise(c), False)
73
  if not cc:
74
+ logging.debug(c)
75
  CORP_TAG = {corpNorm(rmNoise(c), False):v for c,v in CORP_TAG.items()}
76
 
77
  def is_good(nm):
deepdoc/parser/resume/step_two.py CHANGED
@@ -10,7 +10,7 @@
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
-
14
  import re
15
  import copy
16
  import time
@@ -23,7 +23,6 @@ from deepdoc.parser.resume.entities import degrees, schools, corporations
23
  from rag.nlp import rag_tokenizer, surname
24
  from xpinyin import Pinyin
25
  from contextlib import contextmanager
26
- from api.utils.log_utils import logger
27
 
28
 
29
  class TimeoutException(Exception): pass
@@ -164,7 +163,7 @@ def forEdu(cv):
164
  y, m, d = getYMD(edu_end_dt)
165
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
166
  except Exception as e:
167
- logger.exception("forEdu {} {} {}".format(e, edu_end_dt, cv.get("work_exp_flt")))
168
  if sch:
169
  cv["school_name_kwd"] = sch
170
  if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
@@ -276,7 +275,7 @@ def forWork(cv):
276
  try:
277
  duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
278
  except Exception:
279
- logger.exception("forWork {} {}".format(n.get("start_time"), n.get("end_time")))
280
 
281
  if n.get("scale"):
282
  r = re.search(r"^([0-9]+)", str(n["scale"]))
@@ -333,7 +332,7 @@ def forWork(cv):
333
  y, m, d = getYMD(work_st_tm)
334
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
335
  except Exception as e:
336
- logger.exception("forWork {} {} {}".format(e, work_st_tm, cv.get("work_exp_flt")))
337
 
338
  cv["job_num_int"] = 0
339
  if duas:
@@ -464,7 +463,7 @@ def parse(cv):
464
  cv[f"{t}_kwd"] = nms
465
  cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
466
  except Exception:
467
- logger.exception("parse {} {}".format(str(traceback.format_exc()), cv[k]))
468
  cv[k] = []
469
 
470
  # tokenize fields
@@ -565,7 +564,7 @@ def parse(cv):
565
  cv["work_start_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
566
  cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
567
  except Exception as e:
568
- logger.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
569
  if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
570
 
571
  keys = list(cv.keys())
@@ -580,7 +579,7 @@ def parse(cv):
580
 
581
  cv["tob_resume_id"] = str(cv["tob_resume_id"])
582
  cv["id"] = cv["tob_resume_id"]
583
- logger.info("CCCCCCCCCCCCCCC")
584
 
585
  return dealWithInt64(cv)
586
 
 
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
+ import logging
14
  import re
15
  import copy
16
  import time
 
23
  from rag.nlp import rag_tokenizer, surname
24
  from xpinyin import Pinyin
25
  from contextlib import contextmanager
 
26
 
27
 
28
  class TimeoutException(Exception): pass
 
163
  y, m, d = getYMD(edu_end_dt)
164
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
165
  except Exception as e:
166
+ logging.exception("forEdu {} {} {}".format(e, edu_end_dt, cv.get("work_exp_flt")))
167
  if sch:
168
  cv["school_name_kwd"] = sch
169
  if (len(cv.get("degree_kwd", [])) >= 1 and "本科" in cv["degree_kwd"]) \
 
275
  try:
276
  duas.append((datetime.datetime.strptime(ed, "%Y-%m-%d") - datetime.datetime.strptime(st, "%Y-%m-%d")).days)
277
  except Exception:
278
+ logging.exception("forWork {} {}".format(n.get("start_time"), n.get("end_time")))
279
 
280
  if n.get("scale"):
281
  r = re.search(r"^([0-9]+)", str(n["scale"]))
 
332
  y, m, d = getYMD(work_st_tm)
333
  cv["work_exp_flt"] = min(int(str(datetime.date.today())[0:4]) - int(y), cv.get("work_exp_flt", 1000))
334
  except Exception as e:
335
+ logging.exception("forWork {} {} {}".format(e, work_st_tm, cv.get("work_exp_flt")))
336
 
337
  cv["job_num_int"] = 0
338
  if duas:
 
463
  cv[f"{t}_kwd"] = nms
464
  cv[f"{t}_tks"] = rag_tokenizer.tokenize(" ".join(nms))
465
  except Exception:
466
+ logging.exception("parse {} {}".format(str(traceback.format_exc()), cv[k]))
467
  cv[k] = []
468
 
469
  # tokenize fields
 
564
  cv["work_start_dt"] = "%s-%02d-%02d 00:00:00" % (y, int(m), int(d))
565
  cv["work_exp_flt"] = int(str(datetime.date.today())[0:4]) - int(y)
566
  except Exception as e:
567
+ logging.exception("parse {} ==> {}".format(e, cv.get("work_start_time")))
568
  if "work_exp_flt" not in cv and cv.get("work_experience", 0): cv["work_exp_flt"] = int(cv["work_experience"]) / 12.
569
 
570
  keys = list(cv.keys())
 
579
 
580
  cv["tob_resume_id"] = str(cv["tob_resume_id"])
581
  cv["id"] = cv["tob_resume_id"]
582
+ logging.debug("CCCCCCCCCCCCCCC")
583
 
584
  return dealWithInt64(cv)
585
 
deepdoc/vision/operators.py CHANGED
@@ -14,13 +14,13 @@
14
  # limitations under the License.
15
  #
16
 
 
17
  import sys
18
  import six
19
  import cv2
20
  import numpy as np
21
  import math
22
  from PIL import Image
23
- from api.utils.log_utils import logger
24
 
25
 
26
  class DecodeImage(object):
@@ -403,7 +403,7 @@ class DetResizeForTest(object):
403
  return None, (None, None)
404
  img = cv2.resize(img, (int(resize_w), int(resize_h)))
405
  except BaseException:
406
- logger.exception("{} {} {}".format(img.shape, resize_w, resize_h))
407
  sys.exit(0)
408
  ratio_h = resize_h / float(h)
409
  ratio_w = resize_w / float(w)
 
14
  # limitations under the License.
15
  #
16
 
17
+ import logging
18
  import sys
19
  import six
20
  import cv2
21
  import numpy as np
22
  import math
23
  from PIL import Image
 
24
 
25
 
26
  class DecodeImage(object):
 
403
  return None, (None, None)
404
  img = cv2.resize(img, (int(resize_w), int(resize_h)))
405
  except BaseException:
406
+ logging.exception("{} {} {}".format(img.shape, resize_w, resize_h))
407
  sys.exit(0)
408
  ratio_h = resize_h / float(h)
409
  ratio_w = resize_w / float(w)
deepdoc/vision/recognizer.py CHANGED
@@ -11,6 +11,7 @@
11
  # limitations under the License.
12
  #
13
 
 
14
  import os
15
  from copy import deepcopy
16
 
@@ -19,7 +20,6 @@ from huggingface_hub import snapshot_download
19
 
20
  from api.utils.file_utils import get_project_base_directory
21
  from .operators import *
22
- from api.utils.log_utils import logger
23
 
24
 
25
  class Recognizer(object):
@@ -440,7 +440,7 @@ class Recognizer(object):
440
  end_index = min((i + 1) * batch_size, len(imgs))
441
  batch_image_list = imgs[start_index:end_index]
442
  inputs = self.preprocess(batch_image_list)
443
- logger.info("preprocess")
444
  for ins in inputs:
445
  bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
446
  res.append(bb)
 
11
  # limitations under the License.
12
  #
13
 
14
+ import logging
15
  import os
16
  from copy import deepcopy
17
 
 
20
 
21
  from api.utils.file_utils import get_project_base_directory
22
  from .operators import *
 
23
 
24
 
25
  class Recognizer(object):
 
440
  end_index = min((i + 1) * batch_size, len(imgs))
441
  batch_image_list = imgs[start_index:end_index]
442
  inputs = self.preprocess(batch_image_list)
443
+ logging.debug("preprocess")
444
  for ins in inputs:
445
  bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
446
  res.append(bb)
deepdoc/vision/seeit.py CHANGED
@@ -11,10 +11,10 @@
11
  # limitations under the License.
12
  #
13
 
 
14
  import os
15
  import PIL
16
  from PIL import ImageDraw
17
- from api.utils.log_utils import logger
18
 
19
 
20
  def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
@@ -25,7 +25,7 @@ def save_results(image_list, results, labels, output_dir='output/', threshold=0.
25
 
26
  out_path = os.path.join(output_dir, f"{idx}.jpg")
27
  im.save(out_path, quality=95)
28
- logger.info("save result to: " + out_path)
29
 
30
 
31
  def draw_box(im, result, lables, threshold=0.5):
 
11
  # limitations under the License.
12
  #
13
 
14
+ import logging
15
  import os
16
  import PIL
17
  from PIL import ImageDraw
 
18
 
19
 
20
  def save_results(image_list, results, labels, output_dir='output/', threshold=0.5):
 
25
 
26
  out_path = os.path.join(output_dir, f"{idx}.jpg")
27
  im.save(out_path, quality=95)
28
+ logging.debug("save result to: " + out_path)
29
 
30
 
31
  def draw_box(im, result, lables, threshold=0.5):
deepdoc/vision/t_recognizer.py CHANGED
@@ -10,9 +10,9 @@
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
 
13
  import os
14
  import sys
15
- from api.utils.log_utils import logger
16
 
17
  sys.path.insert(
18
  0,
@@ -59,7 +59,7 @@ def main(args):
59
  } for t in lyt]
60
  img = draw_box(images[i], lyt, labels, float(args.threshold))
61
  img.save(outputs[i], quality=95)
62
- logger.info("save result to: " + outputs[i])
63
 
64
 
65
  def get_table_html(img, tb_cpns, ocr):
 
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
+ import logging
14
  import os
15
  import sys
 
16
 
17
  sys.path.insert(
18
  0,
 
59
  } for t in lyt]
60
  img = draw_box(images[i], lyt, labels, float(args.threshold))
61
  img.save(outputs[i], quality=95)
62
+ logging.info("save result to: " + outputs[i])
63
 
64
 
65
  def get_table_html(img, tb_cpns, ocr):
deepdoc/vision/table_structure_recognizer.py CHANGED
@@ -38,7 +38,7 @@ class TableStructureRecognizer(Recognizer):
38
  super().__init__(self.labels, "tsr", os.path.join(
39
  get_project_base_directory(),
40
  "rag/res/deepdoc"))
41
- except Exception as e:
42
  super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc",
43
  local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
44
  local_dir_use_symlinks=False))
 
38
  super().__init__(self.labels, "tsr", os.path.join(
39
  get_project_base_directory(),
40
  "rag/res/deepdoc"))
41
+ except Exception:
42
  super().__init__(self.labels, "tsr", snapshot_download(repo_id="InfiniFlow/deepdoc",
43
  local_dir=os.path.join(get_project_base_directory(), "rag/res/deepdoc"),
44
  local_dir_use_symlinks=False))
graphrag/claim_extractor.py CHANGED
@@ -5,6 +5,7 @@ Reference:
5
  - [graphrag](https://github.com/microsoft/graphrag)
6
  """
7
 
 
8
  import argparse
9
  import json
10
  import re
@@ -17,7 +18,6 @@ import tiktoken
17
  from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
18
  from rag.llm.chat_model import Base as CompletionLLM
19
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
20
- from api.utils.log_utils import logger
21
 
22
  DEFAULT_TUPLE_DELIMITER = "<|>"
23
  DEFAULT_RECORD_DELIMITER = "##"
@@ -126,7 +126,7 @@ class ClaimExtractor:
126
  ]
127
  source_doc_map[document_id] = text
128
  except Exception as e:
129
- logger.exception("error extracting claim")
130
  self._on_error(
131
  e,
132
  traceback.format_exc(),
@@ -265,4 +265,4 @@ if __name__ == "__main__":
265
  "claim_description": ""
266
  }
267
  claim = ex(info)
268
- logger.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
 
5
  - [graphrag](https://github.com/microsoft/graphrag)
6
  """
7
 
8
+ import logging
9
  import argparse
10
  import json
11
  import re
 
18
  from graphrag.claim_prompt import CLAIM_EXTRACTION_PROMPT, CONTINUE_PROMPT, LOOP_PROMPT
19
  from rag.llm.chat_model import Base as CompletionLLM
20
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
 
21
 
22
  DEFAULT_TUPLE_DELIMITER = "<|>"
23
  DEFAULT_RECORD_DELIMITER = "##"
 
126
  ]
127
  source_doc_map[document_id] = text
128
  except Exception as e:
129
+ logging.exception("error extracting claim")
130
  self._on_error(
131
  e,
132
  traceback.format_exc(),
 
265
  "claim_description": ""
266
  }
267
  claim = ex(info)
268
+ logging.info(json.dumps(claim.output, ensure_ascii=False, indent=2))
graphrag/community_reports_extractor.py CHANGED
@@ -5,6 +5,7 @@ Reference:
5
  - [graphrag](https://github.com/microsoft/graphrag)
6
  """
7
 
 
8
  import json
9
  import re
10
  import traceback
@@ -19,7 +20,6 @@ from rag.llm.chat_model import Base as CompletionLLM
19
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
20
  from rag.utils import num_tokens_from_string
21
  from timeit import default_timer as timer
22
- from api.utils.log_utils import logger
23
 
24
 
25
  @dataclass
@@ -80,7 +80,7 @@ class CommunityReportsExtractor:
80
  response = re.sub(r"[^\}]*$", "", response)
81
  response = re.sub(r"\{\{", "{", response)
82
  response = re.sub(r"\}\}", "}", response)
83
- logger.info(response)
84
  response = json.loads(response)
85
  if not dict_has_keys_with_types(response, [
86
  ("title", str),
@@ -92,7 +92,7 @@ class CommunityReportsExtractor:
92
  response["weight"] = weight
93
  response["entities"] = ents
94
  except Exception as e:
95
- logger.exception("CommunityReportsExtractor got exception")
96
  self._on_error(e, traceback.format_exc(), None)
97
  continue
98
 
 
5
  - [graphrag](https://github.com/microsoft/graphrag)
6
  """
7
 
8
+ import logging
9
  import json
10
  import re
11
  import traceback
 
20
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements, dict_has_keys_with_types
21
  from rag.utils import num_tokens_from_string
22
  from timeit import default_timer as timer
 
23
 
24
 
25
  @dataclass
 
80
  response = re.sub(r"[^\}]*$", "", response)
81
  response = re.sub(r"\{\{", "{", response)
82
  response = re.sub(r"\}\}", "}", response)
83
+ logging.debug(response)
84
  response = json.loads(response)
85
  if not dict_has_keys_with_types(response, [
86
  ("title", str),
 
92
  response["weight"] = weight
93
  response["entities"] = ents
94
  except Exception as e:
95
+ logging.exception("CommunityReportsExtractor got exception")
96
  self._on_error(e, traceback.format_exc(), None)
97
  continue
98
 
graphrag/description_summary.py CHANGED
@@ -5,19 +5,11 @@ Reference:
5
  - [graphrag](https://github.com/microsoft/graphrag)
6
  """
7
 
8
- import argparse
9
- import html
10
  import json
11
- import logging
12
- import numbers
13
- import re
14
- import traceback
15
- from collections.abc import Callable
16
  from dataclasses import dataclass
17
 
18
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
19
  from rag.llm.chat_model import Base as CompletionLLM
20
- import networkx as nx
21
 
22
  from rag.utils import num_tokens_from_string
23
 
 
5
  - [graphrag](https://github.com/microsoft/graphrag)
6
  """
7
 
 
 
8
  import json
 
 
 
 
 
9
  from dataclasses import dataclass
10
 
11
  from graphrag.utils import ErrorHandlerFn, perform_variable_replacements
12
  from rag.llm.chat_model import Base as CompletionLLM
 
13
 
14
  from rag.utils import num_tokens_from_string
15
 
graphrag/entity_resolution.py CHANGED
@@ -13,8 +13,8 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
- import itertools
17
  import logging
 
18
  import re
19
  import traceback
20
  from dataclasses import dataclass
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import logging
17
+ import itertools
18
  import re
19
  import traceback
20
  from dataclasses import dataclass
graphrag/index.py CHANGED
@@ -13,6 +13,7 @@
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
 
16
  import os
17
  from concurrent.futures import ThreadPoolExecutor
18
  import json
@@ -28,7 +29,6 @@ from graphrag.graph_extractor import GraphExtractor, DEFAULT_ENTITY_TYPES
28
  from graphrag.mind_map_extractor import MindMapExtractor
29
  from rag.nlp import rag_tokenizer
30
  from rag.utils import num_tokens_from_string
31
- from api.utils.log_utils import logger
32
 
33
 
34
  def graph_merge(g1, g2):
@@ -95,7 +95,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
95
  chunks = []
96
  for n, attr in graph.nodes(data=True):
97
  if attr.get("rank", 0) == 0:
98
- logger.info(f"Ignore entity: {n}")
99
  continue
100
  chunk = {
101
  "name_kwd": n,
@@ -137,7 +137,7 @@ def build_knowledge_graph_chunks(tenant_id: str, chunks: List[str], callback, en
137
  mg = mindmap(_chunks).output
138
  if not len(mg.keys()): return chunks
139
 
140
- logger.info(json.dumps(mg, ensure_ascii=False, indent=2))
141
  chunks.append(
142
  {
143
  "content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
 
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
15
  #
16
+ import logging
17
  import os
18
  from concurrent.futures import ThreadPoolExecutor
19
  import json
 
29
  from graphrag.mind_map_extractor import MindMapExtractor
30
  from rag.nlp import rag_tokenizer
31
  from rag.utils import num_tokens_from_string
 
32
 
33
 
34
  def graph_merge(g1, g2):
 
95
  chunks = []
96
  for n, attr in graph.nodes(data=True):
97
  if attr.get("rank", 0) == 0:
98
+ logging.debug(f"Ignore entity: {n}")
99
  continue
100
  chunk = {
101
  "name_kwd": n,
 
137
  mg = mindmap(_chunks).output
138
  if not len(mg.keys()): return chunks
139
 
140
+ logging.debug(json.dumps(mg, ensure_ascii=False, indent=2))
141
  chunks.append(
142
  {
143
  "content_with_weight": json.dumps(mg, ensure_ascii=False, indent=2),
graphrag/leiden.py CHANGED
@@ -14,8 +14,6 @@ from graspologic.utils import largest_connected_component
14
  import networkx as nx
15
  from networkx import is_empty
16
 
17
- log = logging.getLogger(__name__)
18
-
19
 
20
  def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
21
  """Ensure an undirected graph with the same relationships will always be read the same way."""
@@ -99,7 +97,7 @@ def run(graph: nx.Graph, args: dict[str, Any]) -> dict[int, dict[str, dict]]:
99
  max_cluster_size = args.get("max_cluster_size", 12)
100
  use_lcc = args.get("use_lcc", True)
101
  if args.get("verbose", False):
102
- log.info(
103
  "Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
104
  )
105
  if not graph.nodes(): return {}
 
14
  import networkx as nx
15
  from networkx import is_empty
16
 
 
 
17
 
18
  def _stabilize_graph(graph: nx.Graph) -> nx.Graph:
19
  """Ensure an undirected graph with the same relationships will always be read the same way."""
 
97
  max_cluster_size = args.get("max_cluster_size", 12)
98
  use_lcc = args.get("use_lcc", True)
99
  if args.get("verbose", False):
100
+ logging.debug(
101
  "Running leiden with max_cluster_size=%s, lcc=%s", max_cluster_size, use_lcc
102
  )
103
  if not graph.nodes(): return {}
graphrag/mind_map_extractor.py CHANGED
@@ -14,8 +14,8 @@
14
  # limitations under the License.
15
  #
16
 
17
- import collections
18
  import logging
 
19
  import os
20
  import re
21
  import traceback
@@ -29,7 +29,6 @@ from rag.llm.chat_model import Base as CompletionLLM
29
  import markdown_to_json
30
  from functools import reduce
31
  from rag.utils import num_tokens_from_string
32
- from api.utils.log_utils import logger
33
 
34
 
35
  @dataclass
@@ -193,6 +192,6 @@ class MindMapExtractor:
193
  gen_conf = {"temperature": 0.5}
194
  response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
195
  response = re.sub(r"```[^\n]*", "", response)
196
- logger.info(response)
197
- logger.info(self._todict(markdown_to_json.dictify(response)))
198
  return self._todict(markdown_to_json.dictify(response))
 
14
  # limitations under the License.
15
  #
16
 
 
17
  import logging
18
+ import collections
19
  import os
20
  import re
21
  import traceback
 
29
  import markdown_to_json
30
  from functools import reduce
31
  from rag.utils import num_tokens_from_string
 
32
 
33
 
34
  @dataclass
 
192
  gen_conf = {"temperature": 0.5}
193
  response = self._llm.chat(text, [{"role": "user", "content": "Output:"}], gen_conf)
194
  response = re.sub(r"```[^\n]*", "", response)
195
+ logging.debug(response)
196
+ logging.debug(self._todict(markdown_to_json.dictify(response)))
197
  return self._todict(markdown_to_json.dictify(response))
intergrations/chatgpt-on-wechat/plugins/ragflow_chat.py CHANGED
@@ -1,8 +1,8 @@
 
1
  import requests
2
  from bridge.context import ContextType # Import Context, ContextType
3
  from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
4
  from bridge import *
5
- from api.utils.log_utils import logger
6
  from plugins import Plugin, register # Import Plugin and register
7
  from plugins.event import Event, EventContext, EventAction # Import event-related classes
8
 
@@ -16,7 +16,7 @@ class RAGFlowChat(Plugin):
16
  self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
17
  # Store conversation_id for each user
18
  self.conversations = {}
19
- logger.info("[RAGFlowChat] Plugin initialized")
20
 
21
  def on_handle_context(self, e_context: EventContext):
22
  context = e_context['context']
@@ -45,7 +45,7 @@ class RAGFlowChat(Plugin):
45
  user_id = session_id # Use session_id as user_id
46
 
47
  if not api_key or not host_address:
48
- logger.error("[RAGFlowChat] Missing configuration")
49
  return "The plugin configuration is incomplete. Please check the configuration."
50
 
51
  headers = {
@@ -63,20 +63,20 @@ class RAGFlowChat(Plugin):
63
  }
64
  try:
65
  response = requests.get(url_new_conversation, headers=headers, params=params_new_conversation)
66
- logger.debug(f"[RAGFlowChat] New conversation response: {response.text}")
67
  if response.status_code == 200:
68
  data = response.json()
69
  if data.get("code") == 0:
70
  conversation_id = data["data"]["id"]
71
  self.conversations[user_id] = conversation_id
72
  else:
73
- logger.error(f"[RAGFlowChat] Failed to create conversation: {data.get('message')}")
74
  return f"Sorry, unable to create a conversation: {data.get('message')}"
75
  else:
76
- logger.error(f"[RAGFlowChat] HTTP error when creating conversation: {response.status_code}")
77
  return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
78
  except Exception as e:
79
- logger.exception("[RAGFlowChat] Exception when creating conversation")
80
  return f"Sorry, an internal error occurred: {str(e)}"
81
 
82
  # Step 2: Send the message and get a reply
@@ -95,18 +95,18 @@ class RAGFlowChat(Plugin):
95
 
96
  try:
97
  response = requests.post(url_completion, headers=headers, json=payload_completion)
98
- logger.debug(f"[RAGFlowChat] Completion response: {response.text}")
99
  if response.status_code == 200:
100
  data = response.json()
101
  if data.get("code") == 0:
102
  answer = data["data"]["answer"]
103
  return answer
104
  else:
105
- logger.error(f"[RAGFlowChat] Failed to get answer: {data.get('message')}")
106
  return f"Sorry, unable to get a reply: {data.get('message')}"
107
  else:
108
- logger.error(f"[RAGFlowChat] HTTP error when getting answer: {response.status_code}")
109
  return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
110
  except Exception as e:
111
- logger.exception("[RAGFlowChat] Exception when getting answer")
112
  return f"Sorry, an internal error occurred: {str(e)}"
 
1
+ import logging
2
  import requests
3
  from bridge.context import ContextType # Import Context, ContextType
4
  from bridge.reply import Reply, ReplyType # Import Reply, ReplyType
5
  from bridge import *
 
6
  from plugins import Plugin, register # Import Plugin and register
7
  from plugins.event import Event, EventContext, EventAction # Import event-related classes
8
 
 
16
  self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
17
  # Store conversation_id for each user
18
  self.conversations = {}
19
+ logging.info("[RAGFlowChat] Plugin initialized")
20
 
21
  def on_handle_context(self, e_context: EventContext):
22
  context = e_context['context']
 
45
  user_id = session_id # Use session_id as user_id
46
 
47
  if not api_key or not host_address:
48
+ logging.error("[RAGFlowChat] Missing configuration")
49
  return "The plugin configuration is incomplete. Please check the configuration."
50
 
51
  headers = {
 
63
  }
64
  try:
65
  response = requests.get(url_new_conversation, headers=headers, params=params_new_conversation)
66
+ logging.debug(f"[RAGFlowChat] New conversation response: {response.text}")
67
  if response.status_code == 200:
68
  data = response.json()
69
  if data.get("code") == 0:
70
  conversation_id = data["data"]["id"]
71
  self.conversations[user_id] = conversation_id
72
  else:
73
+ logging.error(f"[RAGFlowChat] Failed to create conversation: {data.get('message')}")
74
  return f"Sorry, unable to create a conversation: {data.get('message')}"
75
  else:
76
+ logging.error(f"[RAGFlowChat] HTTP error when creating conversation: {response.status_code}")
77
  return f"Sorry, unable to connect to RAGFlow API (create conversation). HTTP status code: {response.status_code}"
78
  except Exception as e:
79
+ logging.exception("[RAGFlowChat] Exception when creating conversation")
80
  return f"Sorry, an internal error occurred: {str(e)}"
81
 
82
  # Step 2: Send the message and get a reply
 
95
 
96
  try:
97
  response = requests.post(url_completion, headers=headers, json=payload_completion)
98
+ logging.debug(f"[RAGFlowChat] Completion response: {response.text}")
99
  if response.status_code == 200:
100
  data = response.json()
101
  if data.get("code") == 0:
102
  answer = data["data"]["answer"]
103
  return answer
104
  else:
105
+ logging.error(f"[RAGFlowChat] Failed to get answer: {data.get('message')}")
106
  return f"Sorry, unable to get a reply: {data.get('message')}"
107
  else:
108
+ logging.error(f"[RAGFlowChat] HTTP error when getting answer: {response.status_code}")
109
  return f"Sorry, unable to connect to RAGFlow API (get reply). HTTP status code: {response.status_code}"
110
  except Exception as e:
111
+ logging.exception("[RAGFlowChat] Exception when getting answer")
112
  return f"Sorry, an internal error occurred: {str(e)}"
rag/app/book.py CHANGED
@@ -10,6 +10,7 @@
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
 
13
  from tika import parser
14
  import re
15
  from io import BytesIO
@@ -20,7 +21,6 @@ from rag.nlp import bullets_category, is_english,remove_contents_table, \
20
  tokenize_chunks
21
  from rag.nlp import rag_tokenizer
22
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
23
- from api.utils.log_utils import logger
24
 
25
 
26
  class Pdf(PdfParser):
@@ -39,7 +39,7 @@ class Pdf(PdfParser):
39
  start = timer()
40
  self._layouts_rec(zoomin)
41
  callback(0.67, "Layout analysis finished")
42
- logger.info("layouts: {}".format(timer() - start))
43
  self._table_transformer_job(zoomin)
44
  callback(0.68, "Table analysis finished")
45
  self._text_merge()
 
10
  # See the License for the specific language governing permissions and
11
  # limitations under the License.
12
  #
13
+ import logging
14
  from tika import parser
15
  import re
16
  from io import BytesIO
 
21
  tokenize_chunks
22
  from rag.nlp import rag_tokenizer
23
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
 
24
 
25
 
26
  class Pdf(PdfParser):
 
39
  start = timer()
40
  self._layouts_rec(zoomin)
41
  callback(0.67, "Layout analysis finished")
42
+ logging.debug("layouts: {}".format(timer() - start))
43
  self._table_transformer_job(zoomin)
44
  callback(0.68, "Table analysis finished")
45
  self._text_merge()
rag/app/email.py CHANGED
@@ -11,6 +11,7 @@
11
  # limitations under the License.
12
  #
13
 
 
14
  from email import policy
15
  from email.parser import BytesParser
16
  from rag.app.naive import chunk as naive_chunk
@@ -18,7 +19,6 @@ import re
18
  from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
19
  from deepdoc.parser import HtmlParser, TxtParser
20
  from timeit import default_timer as timer
21
- from api.utils.log_utils import logger
22
  import io
23
 
24
 
@@ -86,7 +86,7 @@ def chunk(
86
  )
87
 
88
  main_res.extend(tokenize_chunks(chunks, doc, eng, None))
89
- logger.info("naive_merge({}): {}".format(filename, timer() - st))
90
  # get the attachment info
91
  for part in msg.iter_attachments():
92
  content_disposition = part.get("Content-Disposition")
 
11
  # limitations under the License.
12
  #
13
 
14
+ import logging
15
  from email import policy
16
  from email.parser import BytesParser
17
  from rag.app.naive import chunk as naive_chunk
 
19
  from rag.nlp import rag_tokenizer, naive_merge, tokenize_chunks
20
  from deepdoc.parser import HtmlParser, TxtParser
21
  from timeit import default_timer as timer
 
22
  import io
23
 
24
 
 
86
  )
87
 
88
  main_res.extend(tokenize_chunks(chunks, doc, eng, None))
89
+ logging.debug("naive_merge({}): {}".format(filename, timer() - st))
90
  # get the attachment info
91
  for part in msg.iter_attachments():
92
  content_disposition = part.get("Content-Disposition")
rag/app/laws.py CHANGED
@@ -21,7 +21,6 @@ from rag.nlp import bullets_category, remove_contents_table, hierarchical_merge,
21
  make_colon_as_title, tokenize_chunks, docx_question_level
22
  from rag.nlp import rag_tokenizer
23
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
24
- from api.utils.log_utils import logger
25
 
26
 
27
  class Docx(DocxParser):
@@ -122,7 +121,7 @@ class Pdf(PdfParser):
122
  start = timer()
123
  self._layouts_rec(zoomin)
124
  callback(0.67, "Layout analysis finished")
125
- logger.info("layouts:".format(
126
  ))
127
  self._naive_vertical_merge()
128
 
 
21
  make_colon_as_title, tokenize_chunks, docx_question_level
22
  from rag.nlp import rag_tokenizer
23
  from deepdoc.parser import PdfParser, DocxParser, PlainParser, HtmlParser
 
24
 
25
 
26
  class Docx(DocxParser):
 
121
  start = timer()
122
  self._layouts_rec(zoomin)
123
  callback(0.67, "Layout analysis finished")
124
+ logging.debug("layouts:".format(
125
  ))
126
  self._naive_vertical_merge()
127