From d11f8c203fe1879ac6f9785b21384da5f1b8b73f Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Tue, 10 Sep 2024 14:17:05 +0200 Subject: [PATCH] Move GPU CI pipelines from old daint to new daint --- .gitlab/includes/clang14_cuda11_pipeline.yml | 12 +++--- .gitlab/includes/common_pipeline.yml | 16 +------- .gitlab/includes/common_spack_pipeline.yml | 6 +-- .gitlab/includes/gcc12_cuda12_pipeline.yml | 40 +++++++++---------- ..._pipeline.yml => gcc13_gh200_pipeline.yml} | 24 +++++------ .gitlab/includes/gcc9_cuda11_pipeline.yml | 10 ++--- .gitlab/includes/nvhpc24_7_pipeline.yml | 29 ++++++-------- .gitlab/pipelines_on_merge.yml | 2 +- .gitlab/pipelines_on_push.yml | 3 ++ 9 files changed, 64 insertions(+), 78 deletions(-) rename .gitlab/includes/{gcc13_santis_pipeline.yml => gcc13_gh200_pipeline.yml} (69%) diff --git a/.gitlab/includes/clang14_cuda11_pipeline.yml b/.gitlab/includes/clang14_cuda11_pipeline.yml index abda5a7e4..1c542e29a 100644 --- a/.gitlab/includes/clang14_cuda11_pipeline.yml +++ b/.gitlab/includes/clang14_cuda11_pipeline.yml @@ -10,10 +10,10 @@ include: .variables_clang14_cuda11_config: variables: - SPACK_ARCH: linux-ubuntu22.04-haswell + SPACK_ARCH: linux-ubuntu22.04-neoverse_v2 COMPILER: clang@14.0.6 CXXSTD: 17 - GPU_TARGET: '60' + GPU_TARGET: '90' SPACK_SPEC: "pika@main arch=$SPACK_ARCH %${COMPILER} +cuda cuda_arch=${GPU_TARGET} malloc=system cxxstd=$CXXSTD +valgrind ^boost@1.79.0 ^cuda@11.5.0 +allow-unsupported-compilers ^hwloc@2.7.0 ^valgrind ~mpi" # PIKA_WITH_CXX_LAMBDA_CAPTURE_DECLTYPE is OFF to test the fallback implementation of PIKA_FORWARD. @@ -28,25 +28,25 @@ include: clang14_cuda11_spack_compiler_image: extends: - .variables_clang14_cuda11_config - - .compiler_image_template_rosa + - .compiler_image_template_gh200 clang14_cuda11_spack_image: needs: [clang14_cuda11_spack_compiler_image] extends: - .variables_clang14_cuda11_config - - .dependencies_image_template_rosa + - .dependencies_image_template_gh200 clang14_cuda11_build: needs: [clang14_cuda11_spack_image] extends: - .variables_clang14_cuda11_config - - .build_template_rosa + - .build_template_gh200 .clang14_cuda11_test_common: needs: [clang14_cuda11_build] extends: - .variables_clang14_cuda11_config - - .test_common_gpu_daint_cuda + - .test_common_gh200 - .test_template clang14_cuda11_test_release: diff --git a/.gitlab/includes/common_pipeline.yml b/.gitlab/includes/common_pipeline.yml index 4b82ecdea..db30faeba 100644 --- a/.gitlab/includes/common_pipeline.yml +++ b/.gitlab/includes/common_pipeline.yml @@ -36,19 +36,7 @@ variables: variables: SLURM_PARTITION: normal -.test_common_gpu_daint_cuda: +.test_common_gh200: extends: - - .container-runner-daint-gpu - - .test_common - variables: - SLURM_PARTITION: normal - -.test_common_gpu_clariden_cuda: - extends: - - .container-runner-clariden-a100 - - .test_common - -.test_common_gpu_clariden_hip: - extends: - - .container-runner-clariden-mi200 + - .container-runner-daint-gh200 - .test_common diff --git a/.gitlab/includes/common_spack_pipeline.yml b/.gitlab/includes/common_spack_pipeline.yml index 668e748cf..650c45e0f 100644 --- a/.gitlab/includes/common_spack_pipeline.yml +++ b/.gitlab/includes/common_spack_pipeline.yml @@ -51,7 +51,7 @@ base_spack_image_x86_64: reports: dotenv: compiler.env -.compiler_image_template_santis: +.compiler_image_template_gh200: needs: [base_spack_image_aarch64] extends: [.container-builder-cscs-gh200, .compiler_image_template] @@ -74,7 +74,7 @@ base_spack_image_x86_64: reports: dotenv: dependencies.env -.dependencies_image_template_santis: +.dependencies_image_template_gh200: extends: [.container-builder-cscs-gh200, .dependencies_image_template] .dependencies_image_template_rosa: @@ -109,7 +109,7 @@ base_spack_image_x86_64: reports: dotenv: "$DOTENV_FILE" -.build_template_santis: +.build_template_gh200: extends: [.container-builder-cscs-gh200, .build_template] .build_template_rosa: diff --git a/.gitlab/includes/gcc12_cuda12_pipeline.yml b/.gitlab/includes/gcc12_cuda12_pipeline.yml index 44adaf185..af6c0b157 100644 --- a/.gitlab/includes/gcc12_cuda12_pipeline.yml +++ b/.gitlab/includes/gcc12_cuda12_pipeline.yml @@ -10,7 +10,7 @@ include: .variables_gcc12_cuda12_config: variables: - SPACK_ARCH: linux-ubuntu22.04-haswell + SPACK_ARCH: linux-ubuntu22.04-neoverse_v2 COMPILER: gcc@12.1.0 CXXSTD: 17 GPU_TARGET: "60" @@ -24,33 +24,31 @@ include: gcc12_cuda12_spack_compiler_image: extends: - .variables_gcc12_cuda12_config - - .compiler_image_template_rosa + - .compiler_image_template_gh200 gcc12_cuda12_spack_image: needs: [gcc12_cuda12_spack_compiler_image] extends: - .variables_gcc12_cuda12_config - - .dependencies_image_template_rosa + - .dependencies_image_template_gh200 gcc12_cuda12_build: needs: [gcc12_cuda12_spack_image] extends: - .variables_gcc12_cuda12_config - - .build_template_rosa - -## Test step currently commented as the cuda driver is too old on clariden: -## https://github.com/pika-org/pika/issues/884 -#.gcc12_cuda12_test_common: -# needs: [gcc12_cuda12_build] -# extends: -# - .variables_gcc12_cuda12_config -# - .test_common_gpu_daint_cuda -# - .test_template -# -#gcc12_cuda12_test_release: -# extends: [.gcc12_cuda12_test_common] -# image: $PERSIST_IMAGE_NAME_RELEASE -# -#gcc12_cuda12_test_debug: -# extends: [.gcc12_cuda12_test_common] -# image: $PERSIST_IMAGE_NAME_DEBUG + - .build_template_gh200 + +.gcc12_cuda12_test_common: + needs: [gcc12_cuda12_build] + extends: + - .variables_gcc12_cuda12_config + - .test_common_gh200 + - .test_template + +gcc12_cuda12_test_release: + extends: [.gcc12_cuda12_test_common] + image: $PERSIST_IMAGE_NAME_RELEASE + +gcc12_cuda12_test_debug: + extends: [.gcc12_cuda12_test_common] + image: $PERSIST_IMAGE_NAME_DEBUG diff --git a/.gitlab/includes/gcc13_santis_pipeline.yml b/.gitlab/includes/gcc13_gh200_pipeline.yml similarity index 69% rename from .gitlab/includes/gcc13_santis_pipeline.yml rename to .gitlab/includes/gcc13_gh200_pipeline.yml index 741110ef3..12ed991e0 100644 --- a/.gitlab/includes/gcc13_santis_pipeline.yml +++ b/.gitlab/includes/gcc13_gh200_pipeline.yml @@ -8,7 +8,7 @@ include: - local: '.gitlab/includes/common_pipeline.yml' - local: '.gitlab/includes/common_spack_pipeline.yml' -.variables_gcc13_santis_config: +.variables_gcc13_gh200_config: variables: SPACK_ARCH: linux-ubuntu22.04-neoverse_v2 COMPILER: gcc@13.1.0 @@ -19,22 +19,22 @@ include: CMAKE_FLAGS: "-DPIKA_WITH_CXX_STANDARD=$CXXSTD -DPIKA_WITH_MALLOC=system \ -DPIKA_WITH_STDEXEC=ON -DPIKA_WITH_SPINLOCK_DEADLOCK_DETECTION=ON" -gcc13_santis_spack_compiler_image: +gcc13_gh200_spack_compiler_image: extends: - - .variables_gcc13_santis_config - - .compiler_image_template_santis + - .variables_gcc13_gh200_config + - .compiler_image_template_gh200 allow_failure: true -gcc13_santis_spack_image: - needs: [gcc13_santis_spack_compiler_image] +gcc13_gh200_spack_image: + needs: [gcc13_gh200_spack_compiler_image] extends: - - .variables_gcc13_santis_config - - .dependencies_image_template_santis + - .variables_gcc13_gh200_config + - .dependencies_image_template_gh200 allow_failure: true -gcc13_santis_build: - needs: [gcc13_santis_spack_image] +gcc13_gh200_build: + needs: [gcc13_gh200_spack_image] extends: - - .variables_gcc13_santis_config - - .build_template_santis + - .variables_gcc13_gh200_config + - .build_template_gh200 allow_failure: true diff --git a/.gitlab/includes/gcc9_cuda11_pipeline.yml b/.gitlab/includes/gcc9_cuda11_pipeline.yml index fd3b1c9b6..0bfbb242c 100644 --- a/.gitlab/includes/gcc9_cuda11_pipeline.yml +++ b/.gitlab/includes/gcc9_cuda11_pipeline.yml @@ -10,7 +10,7 @@ include: .variables_gcc9_cuda11_config: variables: - SPACK_ARCH: linux-ubuntu22.04-haswell + SPACK_ARCH: linux-ubuntu22.04-neoverse_v2 COMPILER: gcc@9.3.0 CXXSTD: 17 GPU_TARGET: '60' @@ -22,25 +22,25 @@ include: gcc9_cuda11_spack_compiler_image: extends: - .variables_gcc9_cuda11_config - - .compiler_image_template_rosa + - .compiler_image_template_gh200 gcc9_cuda11_spack_image: needs: [gcc9_cuda11_spack_compiler_image] extends: - .variables_gcc9_cuda11_config - - .dependencies_image_template_rosa + - .dependencies_image_template_gh200 gcc9_cuda11_build: needs: [gcc9_cuda11_spack_image] extends: - .variables_gcc9_cuda11_config - - .build_template_rosa + - .build_template_gh200 .gcc9_cuda11_test_common: needs: [gcc9_cuda11_build] extends: - .variables_gcc9_cuda11_config - - .test_common_gpu_daint_cuda + - .test_common_gh200 - .test_template gcc9_cuda11_test_release: diff --git a/.gitlab/includes/nvhpc24_7_pipeline.yml b/.gitlab/includes/nvhpc24_7_pipeline.yml index 36d4a19c9..e042c7088 100644 --- a/.gitlab/includes/nvhpc24_7_pipeline.yml +++ b/.gitlab/includes/nvhpc24_7_pipeline.yml @@ -41,19 +41,16 @@ nvhpc24_7_build: - .variables_nvhpc24_7_config - .build_template_rosa -# The test step is disabled until maintenance is over. Pulling the image on compute nodes is too -# slow, and the image is too big. -# .nvhpc24_7_test_common: -# needs: [nvhpc24_7_build] -# extends: -# - .variables_nvhpc24_7_config -# - .test_common_gpu_daint_cuda -# - .test_template -# -# nvhpc24_7_test_release: -# extends: [.nvhpc24_7_test_common] -# image: $PERSIST_IMAGE_NAME_RELEASE -# -# nvhpc24_7_test_debug: -# extends: [.nvhpc24_7_test_common] -# image: $PERSIST_IMAGE_NAME_DEBUG +needs: [nvhpc24_7_build] +extends: + - .variables_nvhpc24_7_config + - .test_common_gh200 + - .test_template + +nvhpc24_7_test_release: + extends: [.nvhpc24_7_test_common] + image: $PERSIST_IMAGE_NAME_RELEASE + +nvhpc24_7_test_debug: + extends: [.nvhpc24_7_test_common] + image: $PERSIST_IMAGE_NAME_DEBUG diff --git a/.gitlab/pipelines_on_merge.yml b/.gitlab/pipelines_on_merge.yml index 6433719a7..f0f5f17cd 100644 --- a/.gitlab/pipelines_on_merge.yml +++ b/.gitlab/pipelines_on_merge.yml @@ -14,7 +14,7 @@ include: - local: '.gitlab/includes/gcc12_pipeline.yml' - local: '.gitlab/includes/gcc12_cuda12_pipeline.yml' - local: '.gitlab/includes/gcc12_hip5_pipeline.yml' - - local: '.gitlab/includes/gcc13_santis_pipeline.yml' + - local: '.gitlab/includes/gcc13_gh200_pipeline.yml' - local: '.gitlab/includes/gcc14_pipeline.yml' - local: '.gitlab/includes/clang11_pipeline.yml' - local: '.gitlab/includes/clang12_pipeline.yml' diff --git a/.gitlab/pipelines_on_push.yml b/.gitlab/pipelines_on_push.yml index 4ad2467e7..42f7c4277 100644 --- a/.gitlab/pipelines_on_push.yml +++ b/.gitlab/pipelines_on_push.yml @@ -9,3 +9,6 @@ include: - local: '.gitlab/includes/clang14_cuda11_pipeline.yml' - local: '.gitlab/includes/gcc12_hip6_pipeline.yml' - local: '.gitlab/includes/sloc.yml' + # TODO: move to on_merge before merging + - local: '.gitlab/includes/gcc9_cuda11_pipeline.yml' + - local: '.gitlab/includes/gcc12_cuda12_pipeline.yml'