# Download base image: latest Ubuntu LTS
FROM ubuntu:22.04

# Set environment variables
ENV NB_USER jovyan
ENV NB_UID 1000
ENV HOME /home/${NB_USER}
ENV JAVA_HOME /usr/lib/jvm/java-17-openjdk-amd64/
ENV DEBIAN_FRONTEND=noninteractive 

# Install required packages
RUN apt-get update && apt-get install -y \
    tzdata \
    tar \
    wget \
    bash \
    rsync \
    gcc \
    libfreetype6-dev \
    libhdf5-dev \
    libpng-dev \
    libzmq3-dev \
    python3 \
    python3-dev \
    python3-pip \
    unzip \
    pkg-config \
    software-properties-common \
    graphviz \
    openjdk-17-jdk \
    ant \
    ca-certificates-java \
    gnupg \
    && apt-get clean \
    && update-ca-certificates -f

# Install Python 3.11 (latest stable version)
RUN apt-get install -y python3.11 python3.11-venv python3.11-dev python3-pip \
    && apt-get clean

# Set up JAVA_HOME
RUN echo "export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64/" >> /etc/profile \
    && echo "export PATH=\$JAVA_HOME/bin:\$PATH" >> /etc/profile

# Create a new user named "jovyan" with user ID 1000
RUN useradd -m -u ${NB_UID} ${NB_USER}

# Switch to the "jovyan" user
USER ${NB_USER}

# Set home and path variables for the user
ENV HOME=/home/${NB_USER} \
    PATH=/home/${NB_USER}/.local/bin:$PATH

# Set up PySpark to use Python 3.11 for both driver and workers
ENV PYSPARK_PYTHON=/usr/bin/python3.11
ENV PYSPARK_DRIVER_PYTHON=/usr/bin/python3.11

# Set the working directory to the user's home directory
WORKDIR ${HOME}

# Upgrade pip and install Python dependencies
RUN python3.11 -m pip install --upgrade pip
COPY requirements.txt /tmp/requirements.txt
RUN python3.11 -m pip install -r /tmp/requirements.txt

# Copy the application code into the container at /home/jovyan
COPY --chown=${NB_USER}:${NB_USER} . ${HOME}

# Expose port for Streamlit
EXPOSE 7860

# Define the entry point for the container
ENTRYPOINT ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]