isthaison commited on
Commit
74057de
·
1 Parent(s): 5d7b16c

extraction sqlquery (#4027)

Browse files

clone https://github.com/infiniflow/ragflow/pull/4023
improve the information extraction, most llm return results in markdown
format ````sql ___ query `____ ```

Files changed (1) hide show
  1. agent/component/exesql.py +17 -3
agent/component/exesql.py CHANGED
@@ -20,7 +20,7 @@ import pymysql
20
  import psycopg2
21
  from agent.component.base import ComponentBase, ComponentParamBase
22
  import pyodbc
23
-
24
 
25
  class ExeSQLParam(ComponentParamBase):
26
  """
@@ -65,13 +65,26 @@ class ExeSQL(ComponentBase, ABC):
65
  self._loop += 1
66
 
67
  ans = self.get_input()
 
 
68
  ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
69
- ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE)
 
 
 
 
 
 
 
 
 
 
70
  ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
71
  ans = re.sub(r';[^;]*$', r';', ans)
72
  if not ans:
73
  raise Exception("SQL statement not found!")
74
 
 
75
  if self._param.db_type in ["mysql", "mariadb"]:
76
  db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
77
  port=self._param.port, password=self._param.password)
@@ -96,11 +109,12 @@ class ExeSQL(ComponentBase, ABC):
96
  if not single_sql:
97
  continue
98
  try:
 
99
  cursor.execute(single_sql)
100
  if cursor.rowcount == 0:
101
  sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
102
  continue
103
- single_res = pd.DataFrame([i for i in cursor.fetchmany(size=self._param.top_n)])
104
  single_res.columns = [i[0] for i in cursor.description]
105
  sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
106
  except Exception as e:
 
20
  import psycopg2
21
  from agent.component.base import ComponentBase, ComponentParamBase
22
  import pyodbc
23
+ import logging
24
 
25
  class ExeSQLParam(ComponentParamBase):
26
  """
 
65
  self._loop += 1
66
 
67
  ans = self.get_input()
68
+
69
+
70
  ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
71
+ if self._param.db_type == 'mssql':
72
+ # improve the information extraction, most llm return results in markdown format ```sql query ```
73
+ match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
74
+ if match:
75
+ ans = match.group(1) # Query content
76
+ print(ans)
77
+ else:
78
+ print("no markdown")
79
+ ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
80
+ else:
81
+ ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE)
82
  ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
83
  ans = re.sub(r';[^;]*$', r';', ans)
84
  if not ans:
85
  raise Exception("SQL statement not found!")
86
 
87
+ logging.info("db_type: ",self._param.db_type)
88
  if self._param.db_type in ["mysql", "mariadb"]:
89
  db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
90
  port=self._param.port, password=self._param.password)
 
109
  if not single_sql:
110
  continue
111
  try:
112
+ logging.info("single_sql: ",single_sql)
113
  cursor.execute(single_sql)
114
  if cursor.rowcount == 0:
115
  sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
116
  continue
117
+ single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)])
118
  single_res.columns = [i[0] for i in cursor.description]
119
  sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
120
  except Exception as e: