Kenny Dizi commited on
Commit
f74a214
·
1 Parent(s): cae0eab

Improve storage engine (#4341)

Browse files

### What problem does this PR solve?

- Bring `STORAGE_IMPL` back in `rag/svr/cache_file_svr.py`
- Simplify storage connection when working with AWS S3

### Type of change

- [x] Refactoring

conf/service_conf.yaml CHANGED
@@ -22,7 +22,7 @@ infinity:
22
  db_name: 'default_db'
23
  redis:
24
  db: 1
25
- password: 'infini_rag_flow'
26
  host: 'redis:6379'
27
 
28
  # postgres:
@@ -34,7 +34,6 @@ redis:
34
  # max_connections: 100
35
  # stale_timeout: 30
36
  # s3:
37
- # endpoint: 'endpoint'
38
  # access_key: 'access_key'
39
  # secret_key: 'secret_key'
40
  # region: 'region'
 
22
  db_name: 'default_db'
23
  redis:
24
  db: 1
25
+ password: 'infini_rag_flow'
26
  host: 'redis:6379'
27
 
28
  # postgres:
 
34
  # max_connections: 100
35
  # stale_timeout: 30
36
  # s3:
 
37
  # access_key: 'access_key'
38
  # secret_key: 'secret_key'
39
  # region: 'region'
docker/service_conf.yaml.template CHANGED
@@ -22,7 +22,7 @@ infinity:
22
  db_name: 'default_db'
23
  redis:
24
  db: 1
25
- password: '${REDIS_PASSWORD:-infini_rag_flow}'
26
  host: '${REDIS_HOST:-redis}:6379'
27
 
28
  # postgres:
@@ -34,7 +34,6 @@ redis:
34
  # max_connections: 100
35
  # stale_timeout: 30
36
  # s3:
37
- # endpoint: 'endpoint'
38
  # access_key: 'access_key'
39
  # secret_key: 'secret_key'
40
  # region: 'region'
 
22
  db_name: 'default_db'
23
  redis:
24
  db: 1
25
+ password: '${REDIS_PASSWORD:-infini_rag_flow}'
26
  host: '${REDIS_HOST:-redis}:6379'
27
 
28
  # postgres:
 
34
  # max_connections: 100
35
  # stale_timeout: 30
36
  # s3:
 
37
  # access_key: 'access_key'
38
  # secret_key: 'secret_key'
39
  # region: 'region'
rag/svr/cache_file_svr.py CHANGED
@@ -19,7 +19,7 @@ import traceback
19
 
20
  from api.db.db_models import close_connection
21
  from api.db.services.task_service import TaskService
22
- from rag.utils.minio_conn import MINIOs
23
  from rag.utils.redis_conn import REDIS_CONN
24
 
25
 
@@ -44,7 +44,7 @@ def main():
44
  key = "{}/{}".format(kb_id, loc)
45
  if REDIS_CONN.exist(key):
46
  continue
47
- file_bin = MINIOs.get(kb_id, loc)
48
  REDIS_CONN.transaction(key, file_bin, 12 * 60)
49
  logging.info("CACHE: {}".format(loc))
50
  except Exception as e:
 
19
 
20
  from api.db.db_models import close_connection
21
  from api.db.services.task_service import TaskService
22
+ from rag.utils.storage_factory import STORAGE_IMPL
23
  from rag.utils.redis_conn import REDIS_CONN
24
 
25
 
 
44
  key = "{}/{}".format(kb_id, loc)
45
  if REDIS_CONN.exist(key):
46
  continue
47
+ file_bin = STORAGE_IMPL.get(kb_id, loc)
48
  REDIS_CONN.transaction(key, file_bin, 12 * 60)
49
  logging.info("CACHE: {}".format(loc))
50
  except Exception as e:
rag/utils/s3_conn.py CHANGED
@@ -1,7 +1,6 @@
1
  import logging
2
  import boto3
3
  from botocore.exceptions import ClientError
4
- from botocore.client import Config
5
  import time
6
  from io import BytesIO
7
  from rag.utils import singleton
@@ -12,7 +11,6 @@ class RAGFlowS3(object):
12
  def __init__(self):
13
  self.conn = None
14
  self.s3_config = settings.S3
15
- self.endpoint = self.s3_config.get('endpoint', None)
16
  self.access_key = self.s3_config.get('access_key', None)
17
  self.secret_key = self.s3_config.get('secret_key', None)
18
  self.region = self.s3_config.get('region', None)
@@ -26,24 +24,14 @@ class RAGFlowS3(object):
26
  pass
27
 
28
  try:
29
-
30
- config = Config(
31
- s3={
32
- 'addressing_style': 'virtual'
33
- }
34
- )
35
-
36
  self.conn = boto3.client(
37
  's3',
38
- endpoint_url=self.endpoint,
39
  region_name=self.region,
40
  aws_access_key_id=self.access_key,
41
- aws_secret_access_key=self.secret_key,
42
- config=config
43
  )
44
  except Exception:
45
- logging.exception(
46
- "Fail to connect %s" % self.endpoint)
47
 
48
  def __close__(self):
49
  del self.conn
 
1
  import logging
2
  import boto3
3
  from botocore.exceptions import ClientError
 
4
  import time
5
  from io import BytesIO
6
  from rag.utils import singleton
 
11
  def __init__(self):
12
  self.conn = None
13
  self.s3_config = settings.S3
 
14
  self.access_key = self.s3_config.get('access_key', None)
15
  self.secret_key = self.s3_config.get('secret_key', None)
16
  self.region = self.s3_config.get('region', None)
 
24
  pass
25
 
26
  try:
 
 
 
 
 
 
 
27
  self.conn = boto3.client(
28
  's3',
 
29
  region_name=self.region,
30
  aws_access_key_id=self.access_key,
31
+ aws_secret_access_key=self.secret_key
 
32
  )
33
  except Exception:
34
+ logging.exception(f"Fail to connect at region {self.region}")
 
35
 
36
  def __close__(self):
37
  del self.conn