liuhua liuhua commited on
Commit
f05a941
·
1 Parent(s): e64a1ed

Update exesql component for agent (#4307)

Browse files

### What problem does this PR solve?

Update exesql component for agent

### Type of change

- [x] Refactoring

---------

Co-authored-by: liuhua <[email protected]>

Files changed (1) hide show
  1. agent/component/exesql.py +55 -33
agent/component/exesql.py CHANGED
@@ -18,11 +18,11 @@ import re
18
  import pandas as pd
19
  import pymysql
20
  import psycopg2
21
- from agent.component.base import ComponentBase, ComponentParamBase
22
  import pyodbc
23
  import logging
24
 
25
- class ExeSQLParam(ComponentParamBase):
26
  """
27
  Define the ExeSQL component parameters.
28
  """
@@ -39,6 +39,7 @@ class ExeSQLParam(ComponentParamBase):
39
  self.top_n = 30
40
 
41
  def check(self):
 
42
  self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgresql', 'mariadb', 'mssql'])
43
  self.check_empty(self.database, "Database name")
44
  self.check_empty(self.username, "database username")
@@ -53,25 +54,14 @@ class ExeSQLParam(ComponentParamBase):
53
  raise ValueError("The host is not accessible.")
54
 
55
 
56
- class ExeSQL(ComponentBase, ABC):
57
  component_name = "ExeSQL"
58
 
59
- def _run(self, history, **kwargs):
60
- if not hasattr(self, "_loop"):
61
- setattr(self, "_loop", 0)
62
- if self._loop >= self._param.loop:
63
- self._loop = 0
64
- raise Exception("Maximum loop time exceeds. Can't query the correct data via SQL statement.")
65
- self._loop += 1
66
-
67
- ans = self.get_input()
68
- ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
69
-
70
- # improve the information extraction, most llm return results in markdown format ```sql query ```
71
  match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
72
  if match:
73
  ans = match.group(1) # Query content
74
- print(ans)
75
  else:
76
  print("no markdown")
77
  ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
@@ -79,7 +69,12 @@ class ExeSQL(ComponentBase, ABC):
79
  ans = re.sub(r';[^;]*$', r';', ans)
80
  if not ans:
81
  raise Exception("SQL statement not found!")
 
82
 
 
 
 
 
83
  logging.info("db_type: ",self._param.db_type)
84
  if self._param.db_type in ["mysql", "mariadb"]:
85
  db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
@@ -100,25 +95,52 @@ class ExeSQL(ComponentBase, ABC):
100
  cursor = db.cursor()
101
  except Exception as e:
102
  raise Exception("Database Connection Failed! \n" + str(e))
 
 
 
 
103
  sql_res = []
104
- for single_sql in re.split(r';', ans.replace(r"\n", " ")):
105
- if not single_sql:
106
- continue
107
- try:
108
- logging.info("single_sql: ",single_sql)
109
- cursor.execute(single_sql)
110
- if cursor.rowcount == 0:
111
- sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
112
- continue
113
- single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)])
114
- single_res.columns = [i[0] for i in cursor.description]
115
- sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
116
- except Exception as e:
117
- sql_res.append({"content": "**Error**:" + str(e) + "\nError SQL Statement:" + single_sql})
118
- pass
 
 
 
 
 
 
119
  db.close()
120
-
121
  if not sql_res:
122
  return ExeSQL.be_output("")
123
-
124
  return pd.DataFrame(sql_res)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  import pandas as pd
19
  import pymysql
20
  import psycopg2
21
+ from agent.component import GenerateParam, Generate
22
  import pyodbc
23
  import logging
24
 
25
+ class ExeSQLParam(GenerateParam):
26
  """
27
  Define the ExeSQL component parameters.
28
  """
 
39
  self.top_n = 30
40
 
41
  def check(self):
42
+ super().check()
43
  self.check_valid_value(self.db_type, "Choose DB type", ['mysql', 'postgresql', 'mariadb', 'mssql'])
44
  self.check_empty(self.database, "Database name")
45
  self.check_empty(self.username, "database username")
 
54
  raise ValueError("The host is not accessible.")
55
 
56
 
57
+ class ExeSQL(Generate, ABC):
58
  component_name = "ExeSQL"
59
 
60
+ def _refactor(self,ans):
 
 
 
 
 
 
 
 
 
 
 
61
  match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
62
  if match:
63
  ans = match.group(1) # Query content
64
+ return ans
65
  else:
66
  print("no markdown")
67
  ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
 
69
  ans = re.sub(r';[^;]*$', r';', ans)
70
  if not ans:
71
  raise Exception("SQL statement not found!")
72
+ return ans
73
 
74
+ def _run(self, history, **kwargs):
75
+ ans = self.get_input()
76
+ ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
77
+ ans = self._refactor(ans)
78
  logging.info("db_type: ",self._param.db_type)
79
  if self._param.db_type in ["mysql", "mariadb"]:
80
  db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
 
95
  cursor = db.cursor()
96
  except Exception as e:
97
  raise Exception("Database Connection Failed! \n" + str(e))
98
+ if not hasattr(self, "_loop"):
99
+ setattr(self, "_loop", 0)
100
+ self._loop += 1
101
+ input_list=re.split(r';', ans.replace(r"\n", " "))
102
  sql_res = []
103
+ for i in range(len(input_list)):
104
+ single_sql=input_list[i]
105
+ while self._loop <= self._param.loop:
106
+ self._loop+=1
107
+ if not single_sql:
108
+ break
109
+ try:
110
+ logging.info("single_sql: ", single_sql)
111
+ cursor.execute(single_sql)
112
+ if cursor.rowcount == 0:
113
+ sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
114
+ break
115
+ single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)])
116
+ single_res.columns = [i[0] for i in cursor.description]
117
+ sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
118
+ break
119
+ except Exception as e:
120
+ single_sql = self._regenerate_sql(single_sql, str(e), **kwargs)
121
+ single_sql = self._refactor(single_sql)
122
+ if self._loop > self._param.loop:
123
+ raise Exception("Maximum loop time exceeds. Can't query the correct data via SQL statement.")
124
  db.close()
 
125
  if not sql_res:
126
  return ExeSQL.be_output("")
 
127
  return pd.DataFrame(sql_res)
128
+
129
+ def _regenerate_sql(self, failed_sql, error_message,**kwargs):
130
+ prompt = f'''
131
+ ## You are the Repair SQL Statement Helper, please modify the original SQL statement based on the SQL query error report.
132
+ ## The original SQL statement is as follows:{failed_sql}.
133
+ ## The contents of the SQL query error report is as follows:{error_message}.
134
+ ## Answer only the modified SQL statement. Please do not give any explanation, just answer the code.
135
+ '''
136
+ self._param.prompt=prompt
137
+ response = Generate._run(self, [], **kwargs)
138
+ try:
139
+ regenerated_sql = response.loc[0,"content"]
140
+ return regenerated_sql
141
+ except Exception as e:
142
+ logging.error(f"Failed to regenerate SQL: {e}")
143
+ return None
144
+
145
+ def debug(self, **kwargs):
146
+ return self._run([], **kwargs)