zhichyu commited on
Commit
0db29df
·
1 Parent(s): 6d4da5b

release with CI (#3891)

Browse files

### What problem does this PR solve?

Refactor Dockerfile files.
Release with CI.

### Type of change

- [x] Refactoring

.github/workflows/release.yml ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: release
2
+
3
+ on:
4
+ schedule:
5
+ - cron: '0 13 * * *' # This schedule runs every 13:00:00Z(21:00:00+08:00)
6
+ # The "create tags" trigger is specifically focused on the creation of new tags, while the "push tags" trigger is activated when tags are pushed, including both new tag creations and updates to existing tags.
7
+ create:
8
+ tags:
9
+ - "v*.*.*" # normal release
10
+ - "nightly" # the only one mutable tag
11
+
12
+ # https://docs.github.com/en/actions/using-jobs/using-concurrency
13
+ concurrency:
14
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
15
+ cancel-in-progress: true
16
+
17
+ jobs:
18
+ release:
19
+ runs-on: [ "ubuntu-latest" ]
20
+ steps:
21
+ - name: Ensure workspace ownership
22
+ run: echo "chown -R $USER $GITHUB_WORKSPACE" && sudo chown -R $USER $GITHUB_WORKSPACE
23
+
24
+ # https://github.com/actions/checkout/blob/v3/README.md
25
+ - name: Check out code
26
+ uses: actions/checkout@v4
27
+ with:
28
+ ssh-key: ${{ secrets.MY_DEPLOY_KEY }}
29
+
30
+ - name: Prepare release body
31
+ run: |
32
+ if [[ $GITHUB_EVENT_NAME == 'create' ]]; then
33
+ RELEASE_TAG=${GITHUB_REF#refs/tags/}
34
+ if [[ $RELEASE_TAG == 'nightly' ]]; then
35
+ PRERELEASE=true
36
+ else
37
+ PRERELEASE=false
38
+ fi
39
+ echo "Workflow triggered by create tag: $RELEASE_TAG"
40
+ else
41
+ RELEASE_TAG=nightly
42
+ PRERELEASE=true
43
+ echo "Workflow triggered by schedule"
44
+ fi
45
+ echo "RELEASE_TAG=$RELEASE_TAG" >> $GITHUB_ENV
46
+ echo "PRERELEASE=$PRERELEASE" >> $GITHUB_ENV
47
+ RELEASE_DATETIME=$(date --rfc-3339=seconds)
48
+ cat <<EOF > release_template.md
49
+ Release $RELEASE_TAG created from $GITHUB_SHA at $RELEASE_DATETIME
50
+ EOF
51
+ envsubst < release_template.md > release_body.md
52
+
53
+ - name: Move the existing mutable tag
54
+ # https://github.com/softprops/action-gh-release/issues/171
55
+ run: |
56
+ if [[ $GITHUB_EVENT_NAME == 'schedule' ]]; then
57
+ # Determine if a given tag exists and matches a specific Git commit.
58
+ # actions/checkout@v4 fetch-tags doesn't work when triggered by schedule
59
+ git fetch --tags
60
+ if [ "$(git rev-parse -q --verify "refs/tags/$RELEASE_TAG")" = "$GITHUB_SHA" ]; then
61
+ echo "mutable tag $RELEASE_TAG exists and matches $GITHUB_SHA"
62
+ else
63
+ git tag -f $RELEASE_TAG $GITHUB_SHA
64
+ git push -f origin $RELEASE_TAG:refs/tags/$RELEASE_TAG
65
+ echo "created/moved mutable tag $RELEASE_TAG to $GITHUB_SHA"
66
+ fi
67
+ fi
68
+
69
+ - name: Set up QEMU
70
+ uses: docker/setup-qemu-action@v3
71
+
72
+ - name: Set up Docker Buildx
73
+ uses: docker/setup-buildx-action@v3
74
+
75
+ # https://github.com/marketplace/actions/docker-login
76
+ - name: Login to Docker Hub
77
+ uses: docker/login-action@v3
78
+ with:
79
+ username: infiniflow
80
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
81
+
82
+ # https://github.com/marketplace/actions/build-and-push-docker-images
83
+ - name: Build and push full image
84
+ uses: docker/build-push-action@v6
85
+ with:
86
+ context: .
87
+ push: true
88
+ tags:
89
+ - infiniflow/ragflow:${{ env.RELEASE_TAG }}
90
+ file: Dockerfile
91
+ platforms:
92
+ - linux/amd64
93
+ - linux/arm64
94
+
95
+ # https://github.com/marketplace/actions/build-and-push-docker-images
96
+ - name: Build and push slim image
97
+ uses: docker/build-push-action@v6
98
+ with:
99
+ context: .
100
+ push: true
101
+ tags:
102
+ - infiniflow/ragflow:${{ env.RELEASE_TAG }}-slim
103
+ file: Dockerfile
104
+ build-args:
105
+ - LIGHTEN=1
106
+ platforms:
107
+ - linux/amd64
108
+ - linux/arm64
109
+
110
+ - name: Build ragflow-sdk
111
+ if: startsWith(github.ref, 'refs/tags/v')
112
+ run: |
113
+ apt install -y pipx && \
114
+ pipx install poetry && \
115
+ cd sdk/python && \
116
+ poetry build
117
+
118
+ - name: Publish package distributions to PyPI
119
+ if: startsWith(github.ref, 'refs/tags/v')
120
+ uses: pypa/gh-action-pypi-publish@release/v1
121
+ with:
122
+ packages-dir: dist/
123
+ password: ${{ secrets.PYPI_API_TOKEN }}
124
+ verbose: true
.github/workflows/tests.yml CHANGED
@@ -52,9 +52,8 @@ jobs:
52
  - name: Build ragflow:dev-slim
53
  run: |
54
  RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
55
- cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data ${RUNNER_WORKSPACE_PREFIX}/libssl*.deb ${RUNNER_WORKSPACE_PREFIX}/tika-server*.jar* ${RUNNER_WORKSPACE_PREFIX}/chrome* ${RUNNER_WORKSPACE_PREFIX}/cl100k_base.tiktoken .
56
  sudo docker pull ubuntu:22.04
57
- sudo docker build --progress=plain -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
58
 
59
  - name: Build ragflow:dev
60
  run: |
 
52
  - name: Build ragflow:dev-slim
53
  run: |
54
  RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
 
55
  sudo docker pull ubuntu:22.04
56
+ sudo docker build --progress=plain --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
57
 
58
  - name: Build ragflow:dev
59
  run: |
Dockerfile CHANGED
@@ -3,37 +3,57 @@ FROM ubuntu:22.04 AS base
3
  USER root
4
  SHELL ["/bin/bash", "-c"]
5
 
6
- ENV LIGHTEN=0
 
7
 
8
  WORKDIR /ragflow
9
 
10
- RUN rm -f /etc/apt/apt.conf.d/docker-clean \
11
- && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
12
-
13
- RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
14
- apt update && apt-get --no-install-recommends install -y ca-certificates
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- # Setup apt mirror site
17
- RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
 
20
  apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
21
- libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \
22
- && rm -rf /var/lib/apt/lists/*
23
-
24
- RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
25
- && pipx install poetry \
26
- && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
27
 
28
- # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
29
- # aspose-slides on linux/arm64 is unavailable
30
- RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
31
- --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
32
- if [ "$(uname -m)" = "x86_64" ]; then \
33
- dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
34
- elif [ "$(uname -m)" = "aarch64" ]; then \
35
- dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
36
- fi
37
 
38
  ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
39
  ENV PATH=/root/.local/bin:$PATH
@@ -45,7 +65,7 @@ ENV POETRY_REQUESTS_TIMEOUT=15
45
  ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
46
 
47
  # nodejs 12.22 on Ubuntu 22.04 is too old
48
- RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
49
  curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
50
  apt purge -y nodejs npm && \
51
  apt autoremove && \
@@ -53,6 +73,26 @@ RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked
53
  apt install -y nodejs cargo && \
54
  rm -rf /var/lib/apt/lists/*
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # builder stage
57
  FROM base AS builder
58
  USER root
@@ -62,7 +102,7 @@ WORKDIR /ragflow
62
  # install dependencies from poetry.lock file
63
  COPY pyproject.toml poetry.toml poetry.lock ./
64
 
65
- RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
66
  if [ "$LIGHTEN" == "1" ]; then \
67
  poetry install --no-root; \
68
  else \
@@ -71,20 +111,12 @@ RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sh
71
 
72
  COPY web web
73
  COPY docs docs
74
- RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
75
  cd web && npm install --force && npm run build
76
 
77
  COPY .git /ragflow/.git
78
 
79
- RUN current_commit=$(git rev-parse --short HEAD); \
80
- last_tag=$(git describe --tags --abbrev=0); \
81
- commit_count=$(git rev-list --count "$last_tag..HEAD"); \
82
- version_info=""; \
83
- if [ "$commit_count" -eq 0 ]; then \
84
- version_info=$last_tag; \
85
- else \
86
- version_info="$current_commit($last_tag~$commit_count)"; \
87
- fi; \
88
  if [ "$LIGHTEN" == "1" ]; then \
89
  version_info="$version_info slim"; \
90
  else \
@@ -104,49 +136,6 @@ ENV VIRTUAL_ENV=/ragflow/.venv
104
  COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
105
  ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
106
 
107
- # Install python packages' dependencies
108
- # cv2 requires libGL.so.1
109
- RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
110
- apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
111
- rm -rf /var/lib/apt/lists/*
112
-
113
- # Copy models downloaded via download_deps.py
114
- RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
115
- RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
116
- tar --exclude='.*' -cf - \
117
- /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
118
- /huggingface.co/InfiniFlow/deepdoc \
119
- | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
120
- RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
121
- tar -cf - \
122
- /huggingface.co/BAAI/bge-large-zh-v1.5 \
123
- /huggingface.co/BAAI/bge-reranker-v2-m3 \
124
- /huggingface.co/maidalun1020/bce-embedding-base_v1 \
125
- /huggingface.co/maidalun1020/bce-reranker-base_v1 \
126
- | tar -xf - --strip-components=2 -C /root/.ragflow
127
-
128
- # Copy nltk data downloaded via download_deps.py
129
- COPY nltk_data /root/nltk_data
130
-
131
- # https://github.com/chrismattmann/tika-python
132
- # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
133
- COPY tika-server-standard-3.0.0.jar /ragflow/tika-server-standard.jar
134
- COPY tika-server-standard-3.0.0.jar.md5 /ragflow/tika-server-standard.jar.md5
135
- ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar"
136
-
137
- # Copy cl100k_base
138
- COPY cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
139
-
140
- # Add dependencies of selenium
141
- RUN --mount=type=bind,source=chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
142
- unzip /chrome-linux64.zip && \
143
- mv chrome-linux64 /opt/chrome && \
144
- ln -s /opt/chrome/chrome /usr/local/bin/
145
- RUN --mount=type=bind,source=chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
146
- unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
147
- mv chromedriver /usr/local/bin/ && \
148
- rm -f /usr/bin/google-chrome
149
-
150
  ENV PYTHONPATH=/ragflow/
151
 
152
  COPY web web
 
3
  USER root
4
  SHELL ["/bin/bash", "-c"]
5
 
6
+ ARG LIGHTEN=0
7
+ ENV LIGHTEN=${LIGHTEN}
8
 
9
  WORKDIR /ragflow
10
 
11
+ # Copy models downloaded via download_deps.py
12
+ RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
13
+ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
14
+ tar --exclude='.*' -cf - \
15
+ /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
16
+ /huggingface.co/InfiniFlow/deepdoc \
17
+ | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
18
+ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/huggingface.co,target=/huggingface.co \
19
+ if [ "$LIGHTEN" == "0" ]; then \
20
+ (tar -cf - \
21
+ /huggingface.co/BAAI/bge-large-zh-v1.5 \
22
+ /huggingface.co/BAAI/bge-reranker-v2-m3 \
23
+ /huggingface.co/maidalun1020/bce-embedding-base_v1 \
24
+ /huggingface.co/maidalun1020/bce-reranker-base_v1 \
25
+ | tar -xf - --strip-components=2 -C /root/.ragflow) \
26
+ fi
27
 
28
+ # https://github.com/chrismattmann/tika-python
29
+ # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
30
+ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
31
+ cp -r /deps/nltk_data /root/ && \
32
+ cp /deps/tika-server-standard-3.0.0.jar /deps/tika-server-standard-3.0.0.jar.md5 /ragflow/ && \
33
+ cp /deps/cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
34
+
35
+ ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard-3.0.0.jar"
36
+
37
+ # Setup apt
38
+ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
39
+ sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list && \
40
+ rm -f /etc/apt/apt.conf.d/docker-clean && \
41
+ echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache && \
42
+ apt update && apt --no-install-recommends install -y ca-certificates && \
43
+ rm -rf /var/lib/apt/lists/*
44
 
45
+ # cv2 requires libGL.so.1
46
+ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
47
  apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
48
+ libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git nginx libgl1 vim less && \
49
+ rm -rf /var/lib/apt/lists/*
 
 
 
 
50
 
51
+ RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
52
+ pip3 config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
53
+ pipx install poetry && \
54
+ pipx runpip poetry config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && \
55
+ pipx runpip poetry config set global.trusted-host pypi.tuna.tsinghua.edu.cn && \
56
+ /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
 
 
 
57
 
58
  ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
59
  ENV PATH=/root/.local/bin:$PATH
 
65
  ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
66
 
67
  # nodejs 12.22 on Ubuntu 22.04 is too old
68
+ RUN --mount=type=cache,id=ragflow_apt,target=/var/cache/apt,sharing=locked \
69
  curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
70
  apt purge -y nodejs npm && \
71
  apt autoremove && \
 
73
  apt install -y nodejs cargo && \
74
  rm -rf /var/lib/apt/lists/*
75
 
76
+ # Add dependencies of selenium
77
+ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
78
+ unzip /chrome-linux64.zip && \
79
+ mv chrome-linux64 /opt/chrome && \
80
+ ln -s /opt/chrome/chrome /usr/local/bin/
81
+ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
82
+ unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
83
+ mv chromedriver /usr/local/bin/ && \
84
+ rm -f /usr/bin/google-chrome
85
+
86
+ # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
87
+ # aspose-slides on linux/arm64 is unavailable
88
+ RUN --mount=type=bind,from=infiniflow/ragflow_deps:latest,source=/,target=/deps \
89
+ if [ "$(uname -m)" = "x86_64" ]; then \
90
+ dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
91
+ elif [ "$(uname -m)" = "aarch64" ]; then \
92
+ dpkg -i /deps/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
93
+ fi
94
+
95
+
96
  # builder stage
97
  FROM base AS builder
98
  USER root
 
102
  # install dependencies from poetry.lock file
103
  COPY pyproject.toml poetry.toml poetry.lock ./
104
 
105
+ RUN --mount=type=cache,id=ragflow_poetry,target=/root/.cache/pypoetry,sharing=locked \
106
  if [ "$LIGHTEN" == "1" ]; then \
107
  poetry install --no-root; \
108
  else \
 
111
 
112
  COPY web web
113
  COPY docs docs
114
+ RUN --mount=type=cache,id=ragflow_npm,target=/root/.npm,sharing=locked \
115
  cd web && npm install --force && npm run build
116
 
117
  COPY .git /ragflow/.git
118
 
119
+ RUN version_info=$(git describe --tags --match=v* --dirty); \
 
 
 
 
 
 
 
 
120
  if [ "$LIGHTEN" == "1" ]; then \
121
  version_info="$version_info slim"; \
122
  else \
 
136
  COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
137
  ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  ENV PYTHONPATH=/ragflow/
140
 
141
  COPY web web
Dockerfile.deps ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # This builds an image that contains the resources needed by Dockerfile
2
+ #
3
+ FROM ubuntu:22.04
4
+
5
+ # Copy resources downloaded via download_deps.py
6
+ COPY chromedriver-linux64-121-0-6167-85 chrome-linux64-121-0-6167-85 cl100k_base.tiktoken libssl1.1_1.1.1f-1ubuntu2_amd64.deb libssl1.1_1.1.1f-1ubuntu2_arm64.deb tika-server-standard-3.0.0.jar tika-server-standard-3.0.0.jar.md5 libssl*.deb /
7
+
8
+ COPY nltk_data /nltk_data
9
+
10
+ COPY huggingface.co /huggingface.co
Dockerfile.slim DELETED
@@ -1,163 +0,0 @@
1
- # base stage
2
- FROM ubuntu:22.04 AS base
3
- USER root
4
- SHELL ["/bin/bash", "-c"]
5
-
6
- ENV LIGHTEN=1
7
-
8
- WORKDIR /ragflow
9
-
10
- RUN rm -f /etc/apt/apt.conf.d/docker-clean \
11
- && echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
12
-
13
- RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
14
- apt update && apt-get --no-install-recommends install -y ca-certificates
15
-
16
- # Setup apt mirror site
17
- RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list
18
-
19
- RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
20
- apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
21
- libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \
22
- && rm -rf /var/lib/apt/lists/*
23
-
24
- RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
25
- && pipx install poetry \
26
- && /root/.local/bin/poetry self add poetry-plugin-pypi-mirror
27
-
28
- # https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
29
- # aspose-slides on linux/arm64 is unavailable
30
- RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
31
- --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
32
- if [ "$(uname -m)" = "x86_64" ]; then \
33
- dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
34
- elif [ "$(uname -m)" = "aarch64" ]; then \
35
- dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
36
- fi
37
-
38
- ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
39
- ENV PATH=/root/.local/bin:$PATH
40
- # Configure Poetry
41
- ENV POETRY_NO_INTERACTION=1
42
- ENV POETRY_VIRTUALENVS_IN_PROJECT=true
43
- ENV POETRY_VIRTUALENVS_CREATE=true
44
- ENV POETRY_REQUESTS_TIMEOUT=15
45
- ENV POETRY_PYPI_MIRROR_URL=https://pypi.tuna.tsinghua.edu.cn/simple/
46
-
47
- # nodejs 12.22 on Ubuntu 22.04 is too old
48
- RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
49
- curl -fsSL https://deb.nodesource.com/setup_20.x | bash - && \
50
- apt purge -y nodejs npm && \
51
- apt autoremove && \
52
- apt update && \
53
- apt install -y nodejs cargo && \
54
- rm -rf /var/lib/apt/lists/*
55
-
56
- # builder stage
57
- FROM base AS builder
58
- USER root
59
-
60
- WORKDIR /ragflow
61
-
62
- COPY .git /ragflow/.git
63
-
64
- RUN current_commit=$(git rev-parse --short HEAD); \
65
- last_tag=$(git describe --tags --abbrev=0); \
66
- commit_count=$(git rev-list --count "$last_tag..HEAD"); \
67
- version_info=""; \
68
- if [ "$commit_count" -eq 0 ]; then \
69
- version_info=$last_tag; \
70
- else \
71
- version_info="$current_commit($last_tag~$commit_count)"; \
72
- fi; \
73
- if [ "$LIGHTEN" == "1" ]; then \
74
- version_info="$version_info slim"; \
75
- else \
76
- version_info="$version_info full"; \
77
- fi; \
78
- echo "RAGFlow version: $version_info"; \
79
- echo $version_info > /ragflow/VERSION
80
-
81
- COPY web web
82
- COPY docs docs
83
- RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
84
- cd web && npm install --force && npm run build
85
-
86
- # install dependencies from poetry.lock file
87
- COPY pyproject.toml poetry.toml poetry.lock ./
88
-
89
- RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
90
- if [ "$LIGHTEN" == "1" ]; then \
91
- poetry install --no-root; \
92
- else \
93
- poetry install --no-root --with=full; \
94
- fi
95
-
96
- # production stage
97
- FROM base AS production
98
- USER root
99
-
100
- WORKDIR /ragflow
101
-
102
- COPY --from=builder /ragflow/VERSION /ragflow/VERSION
103
-
104
- # Install python packages' dependencies
105
- # cv2 requires libGL.so.1
106
- RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
107
- apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
108
- rm -rf /var/lib/apt/lists/*
109
-
110
- COPY web web
111
- COPY api api
112
- COPY conf conf
113
- COPY deepdoc deepdoc
114
- COPY rag rag
115
- COPY agent agent
116
- COPY graphrag graphrag
117
- COPY pyproject.toml poetry.toml poetry.lock ./
118
-
119
- # Copy models downloaded via download_deps.py
120
- RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
121
- RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \
122
- tar --exclude='.*' -cf - \
123
- /huggingface.co/InfiniFlow/text_concat_xgb_v1.0 \
124
- /huggingface.co/InfiniFlow/deepdoc \
125
- | tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
126
-
127
- # Copy nltk data downloaded via download_deps.py
128
- COPY nltk_data /root/nltk_data
129
-
130
- # https://github.com/chrismattmann/tika-python
131
- # This is the only way to run python-tika without internet access. Without this set, the default is to check the tika version and pull latest every time from Apache.
132
- COPY tika-server-standard-3.0.0.jar /ragflow/tika-server-standard.jar
133
- COPY tika-server-standard-3.0.0.jar.md5 /ragflow/tika-server-standard.jar.md5
134
- ENV TIKA_SERVER_JAR="file:///ragflow/tika-server-standard.jar"
135
-
136
- # Copy cl100k_base
137
- COPY cl100k_base.tiktoken /ragflow/9b5ad71b2ce5302211f9c61530b329a4922fc6a4
138
-
139
- # Add dependencies of selenium
140
- RUN --mount=type=bind,source=chrome-linux64-121-0-6167-85,target=/chrome-linux64.zip \
141
- unzip /chrome-linux64.zip && \
142
- mv chrome-linux64 /opt/chrome && \
143
- ln -s /opt/chrome/chrome /usr/local/bin/
144
- RUN --mount=type=bind,source=chromedriver-linux64-121-0-6167-85,target=/chromedriver-linux64.zip \
145
- unzip -j /chromedriver-linux64.zip chromedriver-linux64/chromedriver && \
146
- mv chromedriver /usr/local/bin/ && \
147
- rm -f /usr/bin/google-chrome
148
-
149
- # Copy compiled web pages
150
- COPY --from=builder /ragflow/web/dist /ragflow/web/dist
151
-
152
- # Copy Python environment and packages
153
- ENV VIRTUAL_ENV=/ragflow/.venv
154
- COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
155
- ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
156
-
157
- ENV PYTHONPATH=/ragflow/
158
-
159
- COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template
160
- COPY docker/entrypoint.sh ./entrypoint.sh
161
- RUN chmod +x ./entrypoint.sh
162
-
163
- ENTRYPOINT ["./entrypoint.sh"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
README.md CHANGED
@@ -272,9 +272,7 @@ This image is approximately 1 GB in size and relies on external LLM and embeddin
272
  ```bash
273
  git clone https://github.com/infiniflow/ragflow.git
274
  cd ragflow/
275
- pip3 install huggingface-hub nltk
276
- python3 download_deps.py
277
- docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
278
  ```
279
 
280
  ## 🔧 Build a Docker image including embedding models
@@ -284,8 +282,6 @@ This image is approximately 9 GB in size. As it includes embedding models, it re
284
  ```bash
285
  git clone https://github.com/infiniflow/ragflow.git
286
  cd ragflow/
287
- pip3 install huggingface-hub nltk
288
- python3 download_deps.py
289
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
290
  ```
291
 
 
272
  ```bash
273
  git clone https://github.com/infiniflow/ragflow.git
274
  cd ragflow/
275
+ docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
 
 
276
  ```
277
 
278
  ## 🔧 Build a Docker image including embedding models
 
282
  ```bash
283
  git clone https://github.com/infiniflow/ragflow.git
284
  cd ragflow/
 
 
285
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
286
  ```
287
 
README_id.md CHANGED
@@ -247,9 +247,7 @@ Image ini berukuran sekitar 1 GB dan bergantung pada aplikasi LLM eksternal dan
247
  ```bash
248
  git clone https://github.com/infiniflow/ragflow.git
249
  cd ragflow/
250
- pip3 install huggingface-hub nltk
251
- python3 download_deps.py
252
- docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
253
  ```
254
 
255
  ## 🔧 Membangun Docker Image Termasuk Model Embedding
@@ -259,8 +257,6 @@ Image ini berukuran sekitar 9 GB. Karena sudah termasuk model embedding, ia hany
259
  ```bash
260
  git clone https://github.com/infiniflow/ragflow.git
261
  cd ragflow/
262
- pip3 install huggingface-hub nltk
263
- python3 download_deps.py
264
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
265
  ```
266
 
 
247
  ```bash
248
  git clone https://github.com/infiniflow/ragflow.git
249
  cd ragflow/
250
+ docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
 
 
251
  ```
252
 
253
  ## 🔧 Membangun Docker Image Termasuk Model Embedding
 
257
  ```bash
258
  git clone https://github.com/infiniflow/ragflow.git
259
  cd ragflow/
 
 
260
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
261
  ```
262
 
README_ja.md CHANGED
@@ -228,9 +228,7 @@ RAGFlow はデフォルトで Elasticsearch を使用して全文とベクトル
228
  ```bash
229
  git clone https://github.com/infiniflow/ragflow.git
230
  cd ragflow/
231
- pip3 install huggingface-hub nltk
232
- python3 download_deps.py
233
- docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
234
  ```
235
 
236
  ## 🔧 ソースコードをコンパイルしたDockerイメージ(埋め込みモデルを含む)
@@ -240,8 +238,6 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
240
  ```bash
241
  git clone https://github.com/infiniflow/ragflow.git
242
  cd ragflow/
243
- pip3 install huggingface-hub nltk
244
- python3 download_deps.py
245
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
246
  ```
247
 
 
228
  ```bash
229
  git clone https://github.com/infiniflow/ragflow.git
230
  cd ragflow/
231
+ docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
 
 
232
  ```
233
 
234
  ## 🔧 ソースコードをコンパイルしたDockerイメージ(埋め込みモデルを含む)
 
238
  ```bash
239
  git clone https://github.com/infiniflow/ragflow.git
240
  cd ragflow/
 
 
241
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
242
  ```
243
 
README_ko.md CHANGED
@@ -230,9 +230,7 @@ RAGFlow 는 기본적으로 Elasticsearch 를 사용하여 전체 텍스트 및
230
  ```bash
231
  git clone https://github.com/infiniflow/ragflow.git
232
  cd ragflow/
233
- pip3 install huggingface-hub nltk
234
- python3 download_deps.py
235
- docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
236
  ```
237
 
238
  ## 🔧 소스 코드로 Docker 이미지를 컴파일합니다(임베딩 모델 포함)
@@ -242,8 +240,6 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
242
  ```bash
243
  git clone https://github.com/infiniflow/ragflow.git
244
  cd ragflow/
245
- pip3 install huggingface-hub nltk
246
- python3 download_deps.py
247
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
248
  ```
249
 
 
230
  ```bash
231
  git clone https://github.com/infiniflow/ragflow.git
232
  cd ragflow/
233
+ docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
 
 
234
  ```
235
 
236
  ## 🔧 소스 코드로 Docker 이미지를 컴파일합니다(임베딩 모델 포함)
 
240
  ```bash
241
  git clone https://github.com/infiniflow/ragflow.git
242
  cd ragflow/
 
 
243
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
244
  ```
245
 
README_zh.md CHANGED
@@ -235,9 +235,7 @@ RAGFlow 默认使用 Elasticsearch 存储文本和向量数据. 如果要切换
235
  ```bash
236
  git clone https://github.com/infiniflow/ragflow.git
237
  cd ragflow/
238
- pip3 install huggingface-hub nltk
239
- python3 download_deps.py
240
- docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
241
  ```
242
 
243
  ## 🔧 源码编译 Docker 镜像(包含 embedding 模型)
@@ -247,8 +245,6 @@ docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
247
  ```bash
248
  git clone https://github.com/infiniflow/ragflow.git
249
  cd ragflow/
250
- pip3 install huggingface-hub nltk
251
- python3 download_deps.py
252
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
253
  ```
254
 
 
235
  ```bash
236
  git clone https://github.com/infiniflow/ragflow.git
237
  cd ragflow/
238
+ docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
 
 
239
  ```
240
 
241
  ## 🔧 源码编译 Docker 镜像(包含 embedding 模型)
 
245
  ```bash
246
  git clone https://github.com/infiniflow/ragflow.git
247
  cd ragflow/
 
 
248
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
249
  ```
250
 
api/versions.py CHANGED
@@ -42,28 +42,11 @@ def get_ragflow_version() -> str:
42
  def get_closest_tag_and_count():
43
  try:
44
  # Get the current commit hash
45
- commit_id = (
46
- subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
47
  .strip()
48
  .decode("utf-8")
49
  )
50
- # Get the closest tag
51
- closest_tag = (
52
- subprocess.check_output(["git", "describe", "--tags", "--abbrev=0"])
53
- .strip()
54
- .decode("utf-8")
55
- )
56
- # Get the commit count since the closest tag
57
- process = subprocess.Popen(
58
- ["git", "rev-list", "--count", f"{closest_tag}..HEAD"],
59
- stdout=subprocess.PIPE,
60
- )
61
- commits_count, _ = process.communicate()
62
- commits_count = int(commits_count.strip())
63
-
64
- if commits_count == 0:
65
- return closest_tag
66
- else:
67
- return f"{commit_id}({closest_tag}~{commits_count})"
68
  except Exception:
69
  return "unknown"
 
42
  def get_closest_tag_and_count():
43
  try:
44
  # Get the current commit hash
45
+ version_info = (
46
+ subprocess.check_output(["git", "describe", "--tags", "--match=v*", "--dirty"])
47
  .strip()
48
  .decode("utf-8")
49
  )
50
+ return version_info
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  except Exception:
52
  return "unknown"
docker/.env CHANGED
@@ -81,7 +81,7 @@ SVR_HTTP_PORT=9380
81
 
82
  # The RAGFlow Docker image to download.
83
  # Defaults to the dev-slim edition, which is the RAGFlow Docker image without embedding models.
84
- RAGFLOW_IMAGE=infiniflow/ragflow:dev-slim
85
  #
86
  # To download the RAGFlow Docker image with embedding models, uncomment the following line instead:
87
  # RAGFLOW_IMAGE=infiniflow/ragflow:dev
 
81
 
82
  # The RAGFlow Docker image to download.
83
  # Defaults to the dev-slim edition, which is the RAGFlow Docker image without embedding models.
84
+ RAGFLOW_IMAGE=infiniflow/ragflow:dev
85
  #
86
  # To download the RAGFlow Docker image with embedding models, uncomment the following line instead:
87
  # RAGFLOW_IMAGE=infiniflow/ragflow:dev
docs/guides/develop/build_docker_image.mdx CHANGED
@@ -40,9 +40,7 @@ While we also test RAGFlow on ARM64 platforms, we do not plan to maintain RAGFlo
40
  ```bash
41
  git clone https://github.com/infiniflow/ragflow.git
42
  cd ragflow/
43
- pip3 install huggingface-hub nltk
44
- python3 download_deps.py
45
- docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
46
  ```
47
 
48
 
@@ -58,8 +56,6 @@ While we also test RAGFlow on ARM64 platforms, we do not plan to maintain RAGFlo
58
  ```bash
59
  git clone https://github.com/infiniflow/ragflow.git
60
  cd ragflow/
61
- pip3 install huggingface-hub nltk
62
- python3 download_deps.py
63
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
64
  ```
65
 
 
40
  ```bash
41
  git clone https://github.com/infiniflow/ragflow.git
42
  cd ragflow/
43
+ docker build --build-arg LIGHTEN=1 -f Dockerfile -t infiniflow/ragflow:dev-slim .
 
 
44
  ```
45
 
46
 
 
56
  ```bash
57
  git clone https://github.com/infiniflow/ragflow.git
58
  cd ragflow/
 
 
59
  docker build -f Dockerfile -t infiniflow/ragflow:dev .
60
  ```
61
 
download_deps.py CHANGED
@@ -1,4 +1,8 @@
1
  #!/usr/bin/env python3
 
 
 
 
2
 
3
  from huggingface_hub import snapshot_download
4
  import nltk
 
1
  #!/usr/bin/env python3
2
+ #
3
+ # Install this script's dependencies with pip3:
4
+ # pip3 install huggingface-hub nltk
5
+
6
 
7
  from huggingface_hub import snapshot_download
8
  import nltk
ubuntu.sources DELETED
@@ -1,39 +0,0 @@
1
- Types: deb
2
- URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
3
- Suites: noble noble-updates noble-backports
4
- Components: main restricted universe multiverse
5
- Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg
6
-
7
- # 默认注释了源码镜像以提高 apt update 速度,如有需要可自行取消注释
8
- # Types: deb-src
9
- # URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
10
- # Suites: noble noble-updates noble-backports
11
- # Components: main restricted universe multiverse
12
- # Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg
13
-
14
- # 以下安全更新软件源包含了官方源与镜像站配置,如有需要可自行修改注释切换
15
- Types: deb
16
- URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
17
- Suites: noble-security
18
- Components: main restricted universe multiverse
19
- Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg
20
-
21
- # Types: deb-src
22
- # URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
23
- # Suites: noble-security
24
- # Components: main restricted universe multiverse
25
- # Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg
26
-
27
- # 预发布软件源,不建议启用
28
-
29
- # Types: deb
30
- # URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
31
- # Suites: noble-proposed
32
- # Components: main restricted universe multiverse
33
- # Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg
34
-
35
- # # Types: deb-src
36
- # # URIs: https://mirrors.tuna.tsinghua.edu.cn/ubuntu
37
- # # Suites: noble-proposed
38
- # # Components: main restricted universe multiverse
39
- # # Signed-By: /usr/share/keyrings/ubuntu-archive-keyring.gpg