Eurosmart commited on
Commit
f4af40d
·
1 Parent(s): 8cec57c
.Dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Ignore Python cache files in all directories
2
+ **/__pycache__/
3
+ *.pyc
4
+
5
+ # Ignore virtual environment directory
6
+ venv/
Dockerfile ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+
8
+ WORKDIR /server-slave
9
+ COPY --chown=user . /server-slave
10
+ RUN pip install -r /server-slave/requirements.txt
11
+ EXPOSE 7000
12
+ CMD ["python", "api/app.py"]
api/app.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, request, jsonify
2
+
3
+ from gevent.pywsgi import WSGIServer
4
+ app = Flask(__name__)
5
+
6
+ from routes import *
7
+
8
+ if __name__ == "__main__":
9
+ http_server = WSGIServer(('', 7000), app)
10
+ http_server.serve_forever()
api/middleware/auth.py ADDED
File without changes
api/routes/__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ from .clusterRoute import *
2
+ from .embeddingRoute import *
api/routes/clusterRoute.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from app import app
2
+ from flask import request, jsonify
3
+ @app.route('/api/cluster', methods=['POST'])
4
+ def cluster():
5
+ data = request.get_json()
6
+ embeddings = data['embeddings']
7
+ from sklearn.cluster import DBSCAN
8
+ import numpy as np
9
+ try:
10
+ dbscan = DBSCAN(eps=0.4, min_samples=2, metric='cosine', n_jobs=-1)
11
+ embeddings_array = np.array(embeddings)
12
+ labels = dbscan.fit_predict(embeddings_array)
13
+ labels = labels.tolist()
14
+ return jsonify({'labels': labels})
15
+ except Exception as e:
16
+ return jsonify({'error': str(e)})
api/routes/embeddingRoute.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ from flask import Flask, request, jsonify
3
+ import logging
4
+ from app import app
5
+ embedding_model = SentenceTransformer("dangvantuan/vietnamese-document-embedding",
6
+ trust_remote_code=True,
7
+ device='cpu')
8
+
9
+ @app.route('/api/embedding', methods=['POST'])
10
+ def embedding():
11
+ data = request.get_json()
12
+ logging.info(data)
13
+ try:
14
+ content = data['text']
15
+ embedding = embedding_model.encode(content)
16
+ embedding = embedding.tolist()
17
+ return jsonify({'embedding': embedding})
18
+ except Exception as e:
19
+ return jsonify({'error': str(e)})
20
+
requirements.txt ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.2.1
2
+ aiohappyeyeballs==2.4.4
3
+ aiohttp==3.11.11
4
+ aiosignal==1.3.2
5
+ async-timeout==5.0.1
6
+ attrs==24.3.0
7
+ blinker==1.9.0
8
+ certifi==2024.12.14
9
+ cffi==1.17.1
10
+ charset-normalizer==3.4.1
11
+ click==8.1.8
12
+ colorama==0.4.6
13
+ coloredlogs==15.0.1
14
+ datasets==3.2.0
15
+ dill==0.3.8
16
+ evaluate==0.4.3
17
+ filelock==3.16.1
18
+ Flask==3.1.0
19
+ flatbuffers==24.12.23
20
+ frozenlist==1.5.0
21
+ fsspec==2024.9.0
22
+ gevent==24.11.1
23
+ greenlet==3.1.1
24
+ huggingface-hub==0.27.1
25
+ humanfriendly==10.0
26
+ idna==3.10
27
+ importlib_metadata==8.5.0
28
+ itsdangerous==2.2.0
29
+ Jinja2==3.1.5
30
+ joblib==1.4.2
31
+ MarkupSafe==3.0.2
32
+ mpmath==1.3.0
33
+ multidict==6.1.0
34
+ multiprocess==0.70.16
35
+ networkx==3.2.1
36
+ numpy==2.0.2
37
+ onnx==1.17.0
38
+ onnxruntime==1.19.2
39
+ optimum==1.23.3
40
+ packaging==24.2
41
+ pandas==2.2.3
42
+ pillow==11.1.0
43
+ propcache==0.2.1
44
+ protobuf==5.29.3
45
+ psutil==6.1.1
46
+ pyarrow==18.1.0
47
+ pycparser==2.22
48
+ pyreadline3==3.5.4
49
+ python-dateutil==2.9.0.post0
50
+ pytz==2024.2
51
+ PyYAML==6.0.2
52
+ regex==2024.11.6
53
+ requests==2.32.3
54
+ safetensors==0.5.2
55
+ scikit-learn==1.6.1
56
+ scipy==1.13.1
57
+ sentence-transformers==3.3.1
58
+ six==1.17.0
59
+ sympy==1.13.1
60
+ threadpoolctl==3.5.0
61
+ tokenizers==0.20.3
62
+ torch==2.5.1
63
+ tqdm==4.67.1
64
+ transformers==4.46.3
65
+ typing_extensions==4.12.2
66
+ tzdata==2024.2
67
+ urllib3==2.3.0
68
+ Werkzeug==3.1.3
69
+ xxhash==3.5.0
70
+ yarl==1.18.3
71
+ zipp==3.21.0
72
+ zope.event==5.0
73
+ zope.interface==7.2