diff --git a/ml/synthesis/Dockerfile b/ml/synthesis/Dockerfile index f155e712b..63d4881f5 100644 --- a/ml/synthesis/Dockerfile +++ b/ml/synthesis/Dockerfile @@ -4,7 +4,7 @@ # # The following build stages structure is chosen: # -# ubuntu:20.04 <-- dependencies <-- build <-- ml-script +# ubuntu:22.04 <-- dependencies <-- build <-- ml-script # ^--- development <-- development-gpu # # - "build" builds just NITTA itself @@ -15,10 +15,20 @@ # NITTA source code is not added to the image during "dependencies" build stage to avoid breaking build cache for "build" stage which depends on it. # Create another stage deriving from "dependencies" if practice shows we really need source code in the image instead of a bind mount. -# if you change the ubuntu version, don't forget to update the CUDA repository in development containers below -FROM ubuntu:20.04 as dependencies - -# constructing environment in a layered fashion to utilize docker build cache +# if you change the Ubuntu version, don't forget to update the CUDA repository in the development-gpu target below +FROM ubuntu:22.04 as dependencies + +# haskell tooling versions here are for ghcup +# can be: specific like "9.6.2" | "latest" | "recommended" +# default: recommended +# GHC 9.6.2 is needed for the recent fourmolu (0.13.1.0) +ARG GLOBAL_GHC_VER=9.6.2 +ARG STACK_VER=recommended +# tried to get a fourmolu-0.12.0.0 like in CI, but stackage has no snapshot with such a version at the moment of writing: +# https://www.stackage.org/package/fourmolu/snapshots +# this resolver has fourmolu-0.13.1.0 +ARG GLOBAL_STACK_RESOLVER=nightly-2023-08-23 +ARG PYTHON_VER=3.11 # ---- non-root user setup ---- # Non-root is needed: @@ -42,14 +52,16 @@ RUN groupadd ${HOST_GID:+--gid $HOST_GID} devuser \ USER devuser RUN echo "cd /app" >> "/home/devuser/.profile" USER root -# sudo is needed only for development images (and is a security risk), -# so we'll omit its installation in images for non-interactive containers and install it later in development images +# since user will run everything as a non-root user, sudo is required. +# only for development images though, and it's a security risk, so we'll omit its installation in images for +# non-interactive containers and install it later in development images. # -- initializing app dir -- # it will be application's workdir and repo root RUN mkdir /app && chown -R devuser:devuser /app WORKDIR /app + # ---- install build tools ---- # noninteractive is needed to avoid interactive prompts during apt-get install from packages like tzdata ENV DEBIAN_FRONTEND=noninteractive @@ -69,20 +81,24 @@ RUN apt-get install -yq software-properties-common \ # -- haskell -- # GHCup (https://www.haskell.org/ghcup/) is used to install GHC, Stack, etc. -# easier to control versions, practically required for development images -RUN apt-get install -yq build-essential curl libffi-dev libffi7 libgmp-dev libgmp10 libncurses-dev libncurses5 libtinfo5 +# easier to control versions, practically required for development images. +# !!! Currently recommended GHCup version is installed: it's better to have the latest version for development. +# !!! It may break stuff in the future. Pin the version if that becomes a pain. See lines with get-ghcup + ghcup-stack integration. +RUN apt-get install -yq build-essential curl libffi-dev libffi7 libgmp-dev libgmp10 libncurses-dev libncurses5 \ + libtinfo5 libnuma1 +# libnuma1 was added to fix linker errors (?) while building the image against aarch64 +# the rest is ghcup deps from https://www.haskell.org/ghcup/install/#linux-ubuntu USER devuser RUN curl --proto '=https' --tlsv1.2 -sSf https://get-ghcup.haskell.org | \ BOOTSTRAP_HASKELL_NONINTERACTIVE=1 \ - # GHC 9.4.4 is needed for fourmolu-0.10.1.0. can also be: latest | recommended (default) - BOOTSTRAP_HASKELL_GHC_VERSION=9.4.4 \ - BOOTSTRAP_HASKELL_STACK_VERSION=recommended \ + BOOTSTRAP_HASKELL_GHC_VERSION=${GLOBAL_GHC_VER} \ + BOOTSTRAP_HASKELL_STACK_VERSION=${STACK_VER} \ # DO NOT prepend ghcup's binaries dir to PATH in ~/.bashrc - # we'll do it later manually in a way that includes non-interactive ssh shells (required for development) + # we'll do it later manually in a way that includes non-interactive ssh shells (required for development with vscode) # BOOTSTRAP_HASKELL_ADJUST_BASHRC=1 \ sh -# adding ghcup binaries to PATH +# adding ghcup binaries to PATH for image building ENV PATH="/home/devuser/.ghcup/bin:$PATH" # setting up ghcup-stack integration via stack hooks, see https://www.haskell.org/ghcup/guide/#stack-integration @@ -94,24 +110,9 @@ RUN mkdir -p ~/.stack/hooks/ \ && stack config set system-ghc false --global \ # do not fallback to stack ghc installation if ghcup is not available && stack config set install-ghc false --global \ - # update resolver in the implicit global stack project (needed for GHC 9.4.4 for fourmolu-0.10.1.0) - && stack --resolver nightly-2023-04-09 clean - -# setting up PATH in .bashrc to include ghcup binaries (minding non-interactive ssh shells, see inserted comment below) -# (sed inserts given text before the matched comment (which should be there by default) -# if we start a line with #, it gets stripped by Dockerfile parser, so putting \n at the beginning of each line to implement comments -RUN sed -i '/# If not running interactively,/i # ---- custom non-interactive section start ---- \ -\n\ -\n# We do it here BEFORE checking if the shell is interactive because \ -\n# VS Code runs its server in a non-interactive non-login shell created by ssh \ -\n# and those vars are still needed. .bashrc is still executed in this case. \ -\n\ -\n# ghcup-env \ -\n[ -f "/home/devuser/.ghcup/env" ] && source "/home/devuser/.ghcup/env" \ -\n\ -\n# ---- custom non-interactive section end ---- \ -\n\ -' ~/.bashrc + # update resolver in the implicit global stack project (needed for the recent fourmolu) + # using this instead of "stack config" to **initialize** the global project (it's not created yet) + && stack --resolver ${GLOBAL_STACK_RESOLVER} clean RUN echo "GHCUP: $(ghcup --version)" \ && echo "STACK: $(stack --version)" \ @@ -127,14 +128,43 @@ RUN curl -sL https://deb.nodesource.com/setup_16.x | bash \ && npm i -g yarn \ && yarn --version +USER devuser + # -- ml -- -RUN python3 -V \ - && apt-get install python3-pip -yq \ - && python3 -m pip install --upgrade pip \ - && pip3 -V +USER root +# Python build dependencies (pyenv requires them to build Python from source) +RUN apt-get install -yq build-essential libssl-dev zlib1g-dev libbz2-dev libreadline-dev libsqlite3-dev curl \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev libffi-dev liblzma-dev +USER devuser +# - pyenv - +# add PYENV_GIT_TAG=v2.3.24 before "bash" to pin the version +# skipping pinning since it's better for development to have the latest version, although it may break stuff +RUN curl https://pyenv.run | bash +ENV PYENV_ROOT="/home/devuser/.pyenv" +ENV PATH="/home/devuser/.pyenv/shims:/home/devuser/.pyenv/bin:$PATH" +RUN pyenv install ${PYTHON_VER} \ + && pyenv global ${PYTHON_VER} \ + && pip install --upgrade pip \ + && echo "=== === === PYTHON VERSION: " $(python -V) \ + && echo "=== === === PIP VERSION: " $(pip -V) +# - poetry - +# add POETRY_VERSION=1.6.1 before "python3" to pin the version +RUN curl -sSL https://install.python-poetry.org | python3 - +# poetry is installed to ~/.local/bin, which is not in PATH by default +ENV PATH="/home/devuser/.local/bin:$PATH" +# it's also added to PATH in .bashrc (see above) for VS Code Server's non-login non-interactive shell to see it +RUN echo "=== === === POETRY VERSION: " $(poetry --version) \ + # going to use pyenv's "system" python as venv since we're in a container + # should have no problems with permissions since pyenv's python is located in devuser's home + && poetry config virtualenvs.create false # ---- get build dependencies ready ---- +USER root +# preparing stack build for aarch64 +RUN apt-get install -yq clang lldb lld libnuma-dev +ENV LANG=C.UTF-8 + USER devuser # -- haskell -- @@ -148,22 +178,53 @@ RUN yarn install # -- ml -- WORKDIR /app/ml/synthesis -COPY --chown=devuser:devuser ml/synthesis/requirements.txt ./ -RUN pip3 install --user -r requirements.txt \ - && pip3 cache purge - +COPY --chown=devuser:devuser ml/synthesis/poetry.lock ml/synthesis/pyproject.toml ./ +RUN poetry install --no-root --only main --compile # ---- finalizing ---- +# set up PATH in .bashrc to include ghcup and pyenv binaries, yarn and poetry global installs, etc. (with non-interactive ssh shells in mind, see inserted comment below) +# (sed inserts given text before the matched comment (which should be there by default) +# if we start a line with #, it gets stripped by Dockerfile parser, so putting \n at the beginning of each line to implement comments +RUN sed -i '/# If not running interactively,/i # ---- custom non-interactive section start ---- \ +\n\ +\n# We do it here BEFORE checking if the shell is interactive because \ +\n# VS Code runs its server in a non-interactive non-login shell created by ssh \ +\n# and those vars are still needed. .bashrc is still executed in this case. \ +\n\ +\n# ghcup-env \ +\n[ -f "/home/devuser/.ghcup/env" ] && source "/home/devuser/.ghcup/env" \ +\n\ +\nexport PYTHONPATH=/app/ml/synthesis/src:\ +\n\ +\n# adding local bin (poetry, first of all) to PATH\ +\nexport PATH="$HOME/.local/bin:$PATH"\ +\n\ +\n# pyenv config: https://github.com/pyenv/pyenv\ +\nexport PYENV_ROOT="$HOME/.pyenv"\ +\ncommand -v pyenv >/dev/null || export PATH="$PYENV_ROOT/bin:$PATH"\ +\neval "$(pyenv init -)"\ +\n\ +\n# yarn global installs location\ +\nexport PATH="$(yarn global bin):$PATH"\ +\n\ +\n# ---- custom non-interactive section end ---- \ +\n\ +' ~/.bashrc + +# solve buffering issues with python output for development and CI images +ENV PYTHONUNBUFFERED=1 + WORKDIR /app USER devuser + # ----------------------------- FROM dependencies AS development -# ---- target for spinning up a container with development envoronment --- -# Bind mount of repo root to workdir is expected here. -# Using them to map live source code from the host filesystem straight into the container. +# A target to use for spinning up a container with development envoronment (without GPU support for ML). +# Bind mount of repo root to workdir is expected here! See related README.md for more info and usage examples. +# Using bind mounts to map live source code from the host filesystem straight into the container. # Container will pick changes made during development without docker image rebuilds. # Existing container data will be obscured (https://docs.docker.com/storage/bind-mounts/#mount-into-a-non-empty-directory-on-the-container), this is fine. @@ -175,34 +236,44 @@ RUN apt-get install -yq sudo \ && usermod -aG sudo devuser \ # it will be passwordless && echo "devuser ALL=(ALL) NOPASSWD:ALL" | (EDITOR='tee -a' visudo) +# remove the sudo tutorial on startup +USER devuser +RUN touch ~/.sudo_as_admin_successful +USER root # unminimizing the system is required to get a decent teminal experience and dev environment # installing common handy dev tools here too # git is already installed and updated earlier RUN yes | unminimize \ - && apt-get install -yq man-db htop vim + && apt-get install -yq man-db htop vim screen # installing NITTA dev dependencies -RUN apt-get install -yq iverilog gtkwave libtinfo-dev - +RUN apt-get install -yq iverilog gtkwave USER devuser +RUN yarn global add markdownlint-cli2 + +# pre-building stack test dependencies not to waste developer's time on that later +RUN stack build --test --only-dependencies +# install all python dependency groups (including dev ones) +WORKDIR /app/ml/synthesis +RUN poetry install --no-root --compile + +# cd ~ to use the global stack project with the recent resolver and required (recent) fourmolu version WORKDIR /home/devuser -# this used to fail with linker errors without libtinfo-dev installed while building ghc-lib-parser-9.0.2 -# fourmolu-0.10.1.0 is pinned because of https://github.com/ryukzak/nitta/issues/242 -RUN stack install hlint fourmolu-0.10.1.0 +RUN stack install hlint fourmolu WORKDIR /app USER root -# installing and configuring ssh server for remote debugging -RUN apt-get install -yq screen openssh-server +# installing and configuring ssh server for remote development +RUN apt-get install -yq openssh-server RUN mkdir /var/run/sshd \ - && sed -i 's/#PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config + && sed -i 's/#PasswordAuthentication yes/PasswordAuthentication no/' /etc/ssh/sshd_config USER devuser -# GPG commit signing can be troublesome in dev containers -# proposed workaround is a prolonged gpg-agent passphrase timeout and a helper script to enter the passphrase via terminal -# default is 3 hours = 3600 * 3 = 10800 seconds +# GPG commit signing can be troublesome in dev containers - IDEs may not support showing passphrase prompts. +# Proposed workaround is a prolonged gpg-agent passphrase timeout and a helper script to enter the passphrase via +# terminal. Default timeout specified here is 3 hours (10800 seconds). ARG GPG_PASSPHRASE_CACHE_TTL=10800 RUN mkdir -p ~/.gnupg \ && echo "use-agent" >> ~/.gnupg/gpg.conf \ @@ -210,25 +281,19 @@ RUN mkdir -p ~/.gnupg \ && echo "max-cache-ttl ${GPG_PASSPHRASE_CACHE_TTL}" >> ~/.gnupg/gpg-agent.conf \ && echo "#!/bin/bash\ \nexport GPG_TTY=\$(tty)\ -\necho test | gpg --sign > /dev/null" >> ~/passphr.sh \ +\necho anystring | gpg --sign > /dev/null" >> ~/passphr.sh \ && chmod +x ~/passphr.sh \ && chmod 600 ~/.gnupg/* \ && chmod 700 ~/.gnupg # needed for python code in docker-entrypoint-dev.sh -RUN pip3 install --user shutup +RUN pip install --user shutup -# prevent conflicts with previously build artifacts +# prevent conflicts with previous build artifacts RUN stack clean -# adding PYTHONPATH to .bashrc, including non-interactive ssh shells (like one spawned by vscode) -RUN sed -i '/# ---- custom non-interactive section end ----/i export PYTHONPATH=/app/ml/synthesis/src:$PYTHONPATH\n' ~/.bashrc +RUN echo "alias pass=~/passphr.sh" >> ~/.bash_aliases -RUN echo "alias python=python3" >> ~/.bash_aliases \ - && echo "alias pip=pip3" >> ~/.bash_aliases \ - && echo "alias pass=~/passphr.sh" >> ~/.bash_aliases \ - && echo "PATH=$PATH:/home/devuser/.local/bin" >> ~/.profile - ENTRYPOINT ["ml/synthesis/docker-entrypoint-dev.sh"] @@ -236,31 +301,28 @@ ENTRYPOINT ["ml/synthesis/docker-entrypoint-dev.sh"] FROM development as development-gpu -# ---- includes GPU support (tensorflow-gpu) for development container (possible only on Linux / Windows-WSL2 as of 2023.02)--- +# ---- includes GPU support (tensorflow) for development container (possible only on Linux / Windows-WSL2 as of 2023.02)--- USER root # ---- check if expected tensorflow version matches requirements.txt ---- # this should be in sync with version in requirements.txt -ARG TENSORFLOW_VER=2.12.0 +ARG TENSORFLOW_VER=2.12.* # those should be in sync with TENSORFLOW_VER, taken from https://www.tensorflow.org/install/source#gpu -# cuDNN must be a 8.6 for this tensorflow version, but we also need to specify minor version. you can get it from "Available libcudnn8 versions" output below. +# cuDNN version must correspond to chosen tensorflow version, but we also need to specify minor version. +# you can get it from "Available libcudnn8 versions" output below. ARG CUDNN_VER=8.6.0.163 ARG CUDA_VER=11.8 -RUN [ $(grep "tensorflow~=$TENSORFLOW_VER" ml/synthesis/requirements.txt | wc -l) = "1" ] || { echo "Tensorflow version mismatch, cannot continue. Tensorflow version was updated in requirements.txt, but not in the Dockerfile. Grab cuDNN and CUDA versions from https://www.tensorflow.org/install/source#gpu and update the Dockerfile near this check."; exit 1; } && \ - echo "Tensorflow version matches requirements.txt, reinstalling tensorflow $TENSORFLOW_VER with NVIDIA GPU support (cuDNN $CUDNN_VER, CUDA $CUDA_VER)." +RUN [ $(grep "tensorflow = \"${TENSORFLOW_VER}" ml/synthesis/pyproject.toml | wc -l) = "1" ] || { echo "Tensorflow version mismatch, cannot continue. Tensorflow version was updated in pyproject.toml, but not in the Dockerfile. Grab cuDNN and CUDA versions from https://www.tensorflow.org/install/source#gpu and update the Dockerfile near this check."; exit 1; } && \ + echo "Tensorflow version matches pyproject.toml installing dependencies for NVIDIA GPU support (cuDNN $CUDNN_VER, CUDA $CUDA_VER)." # ---- installing GPUs-specific dependencies ---- -# -- replacing tensorflow with tensorflow-gpu -- -RUN pip3 uninstall -y tensorflow \ - && pip3 install --user "tensorflow-gpu~=$TENSORFLOW_VER" - # -- installing CUDA Toolkit required for tensorflow-gpu -- RUN apt-get install -yq wget \ - && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-keyring_1.0-1_all.deb \ + && wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.0-1_all.deb \ && dpkg -i cuda-keyring_1.0-1_all.deb \ && rm cuda-keyring_1.0-1_all.deb \ && apt-get update -yq @@ -314,5 +376,4 @@ FROM build AS ml-script COPY --chown=devuser:devuser examples examples/ COPY --chown=devuser:devuser ml ml/ ENV PYTHONPATH=/app/ml/synthesis/src -ENV PYTHONUNBUFFERED=1 -ENTRYPOINT ["python3"] +ENTRYPOINT ["python"] diff --git a/ml/synthesis/docker-entrypoint-dev.sh b/ml/synthesis/docker-entrypoint-dev.sh index 7839779dd..0b04517b4 100755 --- a/ml/synthesis/docker-entrypoint-dev.sh +++ b/ml/synthesis/docker-entrypoint-dev.sh @@ -10,38 +10,27 @@ if [ ! -f "$ssh_key_location" ]; then echo "New key can be found at /$ssh_key_location, it will be automatically authorized in the container on every launch." fi mkdir -p "$HOME/.ssh" -cat "$ssh_key_location.pub" >> ~/.ssh/authorized_keys - -jupyter_port=${JUPYTER_PORT:-8888} -jupyter_token_filepath=ml/synthesis/.dev/jupyter_token -if [ ! -f "$jupyter_token_filepath" ]; then - jupyter_token=$(python3 -c "import secrets; print(secrets.token_hex(24))") - echo "$jupyter_token" > "$jupyter_token_filepath" -else - jupyter_token=$(cat "$jupyter_token_filepath") -fi +# create authorized_keys if the file doesn't exist +[ ! -f ~/.ssh/authorized_keys ] && touch ~/.ssh/authorized_keys +# append the key to authorized_keys if it's not already there +grep -qxF "$(cat "$ssh_key_location.pub")" ~/.ssh/authorized_keys || cat "$ssh_key_location.pub" >> ~/.ssh/authorized_keys # make profile settings available in this script [ -f ~/.profile ] && . ~/.profile # start a ssh server as a screen daemon screen -dmS sshd sudo -s /usr/sbin/sshd -D -echo "SSH into this container if needed for remote debugging in IDEs: ssh -i $ssh_key_location -p 2222 -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" $(whoami)@localhost" - -# start a jupyter notebook server as a screen daemon -screen -dmS jupyter-notebook jupyter notebook --port $jupyter_port --NotebookApp.token="$jupyter_token" --ip 0.0.0.0 -echo "Jupyter Notebook should be available at: http://localhost:$jupyter_port/?token=$jupyter_token" +echo "SSH into this container for remote development in IDEs: ssh -i $ssh_key_location -p 31032 $(whoami)@localhost" # print info about GPUs available to Tensorflow -python3 -c "import shutup, os; shutup.please(); os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'; import tensorflow as tf; gpu_count = len(tf.config.list_physical_devices('GPU')); print(f'Num of GPUs available to Tensorflow: {gpu_count}')" +python -c "import shutup, os; shutup.please(); os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'; import tensorflow as tf; gpu_count = len(tf.config.list_physical_devices('GPU')); print(f'Num of GPUs available to Tensorflow: {gpu_count}')" +# remove defunct screens screen -wipe > /dev/null echo "Available screens (attach with screen -r ):" screen -ls | sed '1d;$d' -# remove the sudo tutorial on startup -touch ~/.sudo_as_admin_successful # fallback to shell for interactivity TERM=xterm-256color /bin/bash