sivakorn-su commited on
Commit
db47d79
·
1 Parent(s): 03af55b

fix docker

Browse files
Files changed (3) hide show
  1. Dockerfile +28 -29
  2. app.py +5 -1
  3. models.py +4 -2
Dockerfile CHANGED
@@ -2,36 +2,36 @@ FROM nvidia/cuda:12.3.2-cudnn9-runtime-ubuntu22.04
2
 
3
  WORKDIR /app
4
 
5
- ENV DEBIAN_FRONTEND=noninteractive
6
- ENV PYTHONUNBUFFERED=1
7
- ENV PYTHONDONTWRITEBYTECODE=1
8
 
9
- # ติดตั้ง dependencies
10
  RUN apt-get update && apt-get install -y \
11
- python3.10 \
12
- python3.10-venv \
13
- python3-pip \
14
- ffmpeg \
15
- git \
16
- libsndfile1 \
17
- curl \
18
- tzdata \
19
- wget \
20
- build-essential \
21
  && ln -sf /usr/bin/python3.10 /usr/bin/python \
22
  && ln -sf /usr/bin/pip3 /usr/bin/pip \
23
  && rm -rf /var/lib/apt/lists/*
24
 
25
- # ตั้ง timezone
26
  RUN ln -fs /usr/share/zoneinfo/Asia/Bangkok /etc/localtime && \
27
  dpkg-reconfigure -f noninteractive tzdata
28
 
29
- # สร้าง directory cache ต่าง ๆ
30
  RUN mkdir -p /tmp/hf_cache /tmp/torch_cache /tmp/matplotlib /tmp/xdg_cache /tmp/home /tmp/uploads /tmp/pythainlp_data \
31
  && chmod -R 777 /tmp/hf_cache /tmp/torch_cache /tmp/matplotlib /tmp/xdg_cache /tmp/home /tmp/uploads /tmp/pythainlp_data
32
 
33
- # เพิ่ม PATH สำหรับ cuDNN 9 ให้เจอ .so
34
- ENV HUGGINGFACE_HUB_CACHE=/tmp/hf_cache \
 
 
 
 
 
 
 
 
35
  HF_HOME=/tmp/hf_cache \
36
  HF_CACHE=/tmp/hf_cache \
37
  TRANSFORMERS_CACHE=/tmp/hf_cache \
@@ -45,26 +45,25 @@ ENV HUGGINGFACE_HUB_CACHE=/tmp/hf_cache \
45
  LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH \
46
  PYTHAINLP_DATA_DIR=/tmp/pythainlp_data
47
 
48
- # ติดตั้ง Python dependencies
49
  COPY requirements.txt .
50
-
51
  RUN pip install --upgrade pip
52
 
53
- # ติดตั้ง PyTorch ที่ใช้ CUDA 12.1
54
  RUN pip install --no-cache-dir \
55
- torch==2.3.0+cu121 \
56
- torchvision==0.18.0+cu121 \
57
- torchaudio==2.3.0 \
58
  --extra-index-url https://download.pytorch.org/whl/cu121
59
 
60
- # ติดตั้ง dependencies อื่น ๆ
61
  RUN pip install --no-cache-dir -r requirements.txt
62
 
63
- # Copy source code เข้า container
64
  COPY . /app
65
 
66
- # ระบุ port ที่จะ expose
67
  EXPOSE 7860
68
 
69
- # เริ่มแอป
70
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
 
 
 
2
 
3
  WORKDIR /app
4
 
5
+ ENV DEBIAN_FRONTEND=noninteractive \
6
+ PYTHONUNBUFFERED=1 \
7
+ PYTHONDONTWRITEBYTECODE=1
8
 
9
+ # system deps
10
  RUN apt-get update && apt-get install -y \
11
+ python3.10 python3.10-venv python3-pip \
12
+ ffmpeg git libsndfile1 curl tzdata wget build-essential \
 
 
 
 
 
 
 
 
13
  && ln -sf /usr/bin/python3.10 /usr/bin/python \
14
  && ln -sf /usr/bin/pip3 /usr/bin/pip \
15
  && rm -rf /var/lib/apt/lists/*
16
 
17
+ # timezone
18
  RUN ln -fs /usr/share/zoneinfo/Asia/Bangkok /etc/localtime && \
19
  dpkg-reconfigure -f noninteractive tzdata
20
 
21
+ # caches
22
  RUN mkdir -p /tmp/hf_cache /tmp/torch_cache /tmp/matplotlib /tmp/xdg_cache /tmp/home /tmp/uploads /tmp/pythainlp_data \
23
  && chmod -R 777 /tmp/hf_cache /tmp/torch_cache /tmp/matplotlib /tmp/xdg_cache /tmp/home /tmp/uploads /tmp/pythainlp_data
24
 
25
+ # --------- IMPORTANT: limit threads to avoid libgomp spawn fail ---------
26
+ ENV OMP_NUM_THREADS=1 \
27
+ OPENBLAS_NUM_THREADS=1 \
28
+ MKL_NUM_THREADS=1 \
29
+ NUMEXPR_NUM_THREADS=1 \
30
+ VECLIB_MAXIMUM_THREADS=1 \
31
+ BLIS_NUM_THREADS=1 \
32
+ TOKENIZERS_PARALLELISM=false \
33
+ PL_DISABLE_VERSION_CHECK=1 \
34
+ HUGGINGFACE_HUB_CACHE=/tmp/hf_cache \
35
  HF_HOME=/tmp/hf_cache \
36
  HF_CACHE=/tmp/hf_cache \
37
  TRANSFORMERS_CACHE=/tmp/hf_cache \
 
45
  LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH \
46
  PYTHAINLP_DATA_DIR=/tmp/pythainlp_data
47
 
48
+ # python deps
49
  COPY requirements.txt .
 
50
  RUN pip install --upgrade pip
51
 
52
+ # torch cuda12.1
53
  RUN pip install --no-cache-dir \
54
+ torch==2.3.0+cu121 torchvision==0.18.0+cu121 torchaudio==2.3.0 \
 
 
55
  --extra-index-url https://download.pytorch.org/whl/cu121
56
 
57
+ # others
58
  RUN pip install --no-cache-dir -r requirements.txt
59
 
60
+ # app
61
  COPY . /app
62
 
 
63
  EXPOSE 7860
64
 
65
+ # run: limit workers & concurrency; set ulimit before start
66
+ CMD ["bash","-lc", "\
67
+ ulimit -u 4096 || true; \
68
+ exec uvicorn app:app --host 0.0.0.0 --port 7860 --workers 1 --limit-concurrency 8 \
69
+ "]
app.py CHANGED
@@ -9,7 +9,11 @@ import torch
9
  import uvicorn
10
  import asyncio
11
  import logging
12
-
 
 
 
 
13
  from config import UPLOAD_FOLDER, SUPABASE_URL, SUPABASE_KEY
14
  from models import pipelines, models, model_lock, load_model_bundle, overlap_pipeline
15
  from utils import (
 
9
  import uvicorn
10
  import asyncio
11
  import logging
12
+ os.environ.setdefault("OMP_NUM_THREADS", "1")
13
+ os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
14
+ os.environ.setdefault("MKL_NUM_THREADS", "1")
15
+ torch.set_num_threads(1)
16
+ torch.set_num_interop_threads(1)
17
  from config import UPLOAD_FOLDER, SUPABASE_URL, SUPABASE_KEY
18
  from models import pipelines, models, model_lock, load_model_bundle, overlap_pipeline
19
  from utils import (
models.py CHANGED
@@ -54,13 +54,15 @@ async def load_model_bundle():
54
  pipeline = Pipeline.from_pretrained(
55
  "pyannote/speaker-diarization-3.1",
56
  use_auth_token=token,
57
- cache_dir=HF_CACHE_DIR
 
58
  ).to(device_torch)
59
 
60
  overlap_pipeline = Pipeline.from_pretrained(
61
  "pyannote/overlapped-speech-detection",
62
  use_auth_token=token,
63
- cache_dir=HF_CACHE_DIR # ใช้ cache เดียวกับโมเดลอื่น
 
64
  )
65
  model_fallback_chain = [PREFERRED_MODEL] + [m for m in FALLBACK_MODELS if m != PREFERRED_MODEL]
66
 
 
54
  pipeline = Pipeline.from_pretrained(
55
  "pyannote/speaker-diarization-3.1",
56
  use_auth_token=token,
57
+ cache_dir=HF_CACHE_DIR,
58
+ revision="v3.1.1",
59
  ).to(device_torch)
60
 
61
  overlap_pipeline = Pipeline.from_pretrained(
62
  "pyannote/overlapped-speech-detection",
63
  use_auth_token=token,
64
+ cache_dir=HF_CACHE_DIR,
65
+ revision="v3.1.1",
66
  )
67
  model_fallback_chain = [PREFERRED_MODEL] + [m for m in FALLBACK_MODELS if m != PREFERRED_MODEL]
68