content-pipeline / Dockerfile
AK1239's picture
Fix NLTK data permissions and Docker user setup
725e4de
FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive \
PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PORT=7860 \
BASE_DIR=/app \
NLTK_DATA=/app/nltk_data
# Install Python and other dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-dev \
build-essential \
git \
gcc \
g++ \
make \
curl \
&& rm -rf /var/lib/apt/lists/*
# Create a non-root user
RUN useradd -m -u 1000 user
# Set working directory
WORKDIR /app
# Create necessary directories and set permissions
RUN mkdir -p /app/data/pdfs /app/data/texts /app/data/index /app/nltk_data && \
chown -R user:user /app
# Copy requirements first for better caching
COPY --chown=user:user requirements.txt .
# Switch to non-root user
USER user
# Install Python packages
RUN pip3 install --no-cache-dir --user -r requirements.txt
RUN pip3 install --no-cache-dir --user gradio
# Copy your application code and data
COPY --chown=user:user app/ /app/
COPY --chown=user:user data/ /app/data/
# Copy your Gradio interface
COPY --chown=user:user spaces_app.py /app/
# Expose the port Hugging Face Spaces expects
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:${PORT}/health || exit 1
# Command to run the application
CMD ["python3", "spaces_app.py"]