Spaces:
Paused
Paused
| FROM python:3.12 | |
| #RUN pip install --no-cache-dir torch==2.2.2 | |
| #RUN pip install --no-cache-dir flash-attn | |
| RUN apt-get update && \ | |
| apt-get install -y \ | |
| bash \ | |
| git git-lfs \ | |
| wget curl procps gnupg \ | |
| build-essential cmake \ | |
| htop vim nano && \ | |
| rm -rf /var/lib/apt/lists/* | |
| # NVIDIA CUDA Keyring installieren (offizielle Methode seit 2024) | |
| RUN wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb && \ | |
| dpkg -i cuda-keyring_1.1-1_all.deb && \ | |
| apt-get update && \ | |
| apt-get -y install cuda | |
| # CUDA ENV-Variablen setzen | |
| ENV PATH=/usr/local/cuda/bin:${PATH} | |
| ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} | |
| ENV CUDAToolkit_ROOT=/usr/local/cuda | |
| ENV CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=86" | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV PATH="/home/user/.local/bin:$PATH" | |
| # ^ when run as `user`, pip installs executables there | |
| WORKDIR /app | |
| COPY --chown=user . /app | |
| # Wichtig: Isolation deaktivieren für llama-cpp-python Build | |
| RUN pip install --upgrade pip | |
| # Manuell Build-Werkzeuge bereitstellen | |
| RUN pip install --no-cache-dir \ | |
| setuptools \ | |
| wheel \ | |
| scikit-build-core[pyproject] | |
| # Optional: zuerst llama-cpp-python bauen (cachebar) | |
| RUN pip install --no-cache-dir --no-build-isolation llama-cpp-python | |
| # Danach: outetts (zieht llama-cpp-python nicht erneut) | |
| RUN pip install --no-cache-dir --no-build-isolation outetts | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| #CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", , "--ws", "auto", "--allow-websocket-origin", "*"] | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |