Skip to content

Commit

Permalink
adding cuda parts of pdlp: part one
Browse files Browse the repository at this point in the history
  • Loading branch information
galabovaa committed Aug 26, 2024
1 parent 7ce96cb commit 0ac0cec
Show file tree
Hide file tree
Showing 11 changed files with 902 additions and 0 deletions.
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,19 @@ endif()
# emscripten
option(EMSCRIPTEN_HTML "Emscripten HTML output" OFF)

option(CUPDLP_GPU "Build pdlp with nvidia" OFF)
message(STATUS "Build pdlp with nvidia: ${CUPDLP_GPU}")

if (CUPDLP_GPU)
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR})
message(NOTICE "Set build with CUDA ${BUILD_CUDA}")
include(FindCUDAConf.cmake)
set(CUPLDP_CPU OFF)
else()
set(CUPLDP_CPU ON)
set(CUDA_LIBRARY-NOTFOUND true)
endif()

if (BUILD_CXX)
# Default Build Type to be Release
get_property(isMultiConfig GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
Expand Down
33 changes: 33 additions & 0 deletions FindCUDAConf.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@

set(CUDA_LIBRARY-NOTFOUND, OFF)
message(NOTICE "Finding CUDA environment")
message(NOTICE " - CUDA Home detected at $ENV{CUDA_HOME}")
set(CMAKE_CUDA_ARCHITECTURES "all")
set(CMAKE_CUDA_PATH "$ENV{CUDA_HOME}")
set(CMAKE_CUDA_COMPILER "${CMAKE_CUDA_PATH}/bin/nvcc")

enable_language(CUDA)

find_library(CUDA_LIBRARY_ART
NAMES cudart
HINTS "${CMAKE_CUDA_PATH}/lib64/"
REQUIRED
)
find_library(CUDA_LIBRARY_SPS
NAMES cusparse
HINTS "${CMAKE_CUDA_PATH}/lib64/"
REQUIRED
)
find_library(CUDA_LIBRARY_BLS
NAMES cublas
HINTS "${CMAKE_CUDA_PATH}/lib64/"
REQUIRED
)
if (${CUDA_LIBRARY-NOTFOUND})
message(WARNING " - CUDA Libraries not detected at $ENV{CUDA_HOME}")
else ()
message(NOTICE " - CUDA Libraries detected at $ENV{CUDA_HOME}")
set(CUDA_LIBRARY ${CUDA_LIBRARY_ART} ${CUDA_LIBRARY_SPS} ${CUDA_LIBRARY_BLS})
message(NOTICE " - :${CUDA_LIBRARY}")
endif ()

5 changes: 5 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,11 @@ else()
target_sources(highs PRIVATE ${sources} ${headers} ${win_version_file})
target_include_directories(highs PRIVATE ${include_dirs})

# Optional Cuda
if (CUPDLP_GPU)
add_subdirectory(cupdlp)
endif()

if(MSVC)
list(APPEND highs_compile_opts
"/bigobj" # Allow big object
Expand Down
14 changes: 14 additions & 0 deletions src/pdlp/cupdlp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
if (${CUDA_LIBRARY-NOTFOUND})
message(NOTICE "- CPU version PDLP")
# target_compile_definitions(cupdlp
# PUBLIC
# -DCUPDLP_CPU
# )
target_link_libraries(highs m)
else()
add_subdirectory(cuda)
message(NOTICE "- GPU version PDLP")
target_include_directories(highs PUBLIC "/usr/local/cuda/include")
target_link_libraries(highs PRIVATE cudalin ${CUDA_LIBRARY} m)
set_target_properties(highs PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif ()
30 changes: 30 additions & 0 deletions src/pdlp/cupdlp/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
enable_language(CXX CUDA)

add_library(cudalin SHARED
${CUPDLP_INCLUDE_DIR}/cuda/cupdlp_cuda_kernels.cu
${CUPDLP_INCLUDE_DIR}/cuda/cupdlp_cuda_kernels.cuh
${CUPDLP_INCLUDE_DIR}/cuda/cupdlp_cudalinalg.cuh
${CUPDLP_INCLUDE_DIR}/cuda/cupdlp_cudalinalg.cu
)

set_target_properties(cudalin PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(cudalin PUBLIC "/usr/local/cuda/include")
target_compile_definitions(cudalin
PUBLIC
# If the debug configuration pass the DEBUG define to the compiler
$<$<CONFIG:Debug>:HIGHS_DEBUG>
)

target_link_libraries(cudalin ${CUDA_LIBRARY} m)

# add a test
add_executable(testcudalin test_cuda_linalg.c)
add_executable(testcublas test_cublas.c)

set_target_properties(testcudalin PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
#target_include_directories(cudalinalg PRIVATE ${CUPDLP_INCLUDE_DIR}/cuda)
target_link_libraries(testcudalin PRIVATE cudalin ${CUDA_LIBRARY})

set_target_properties(testcublas PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(testcublas PRIVATE cudalin ${CUDA_LIBRARY})

121 changes: 121 additions & 0 deletions src/pdlp/cupdlp/cuda/cupdlp_cuda_kernels.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
#include "cupdlp_cuda_kernels.cuh"

dim3 cuda_gridsize(cupdlp_int n) {
cupdlp_int k = (n - 1) / CUPDLP_BLOCK_SIZE + 1;
cupdlp_int x = k;
cupdlp_int y = 1;
if (x > 65535) {
x = ceil(sqrt(k));
y = (n - 1) / (x * CUPDLP_BLOCK_SIZE) + 1;
}
dim3 d = {x, y, 1};
return d;
}

__global__ void element_wise_dot_kernel(cupdlp_float *x, const cupdlp_float *y,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] *= y[i];
}

__global__ void element_wise_div_kernel(cupdlp_float *x, const cupdlp_float *y,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] /= y[i];
}

__global__ void element_wise_projlb_kernel(cupdlp_float *x,
const cupdlp_float *lb,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] < lb[i] ? lb[i] : x[i];
}

__global__ void element_wise_projub_kernel(cupdlp_float *x,
const cupdlp_float *ub,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] > ub[i] ? ub[i] : x[i];
}

__global__ void element_wise_projSamelb_kernel(cupdlp_float *x,
const cupdlp_float lb,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] <= lb ? lb : x[i];
}

__global__ void element_wise_projSameub_kernel(cupdlp_float *x,
const cupdlp_float ub,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = x[i] >= ub ? ub : x[i];
}

__global__ void element_wise_initHaslb_kernal(cupdlp_float *haslb,
const cupdlp_float *lb,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) haslb[i] = lb[i] > bound ? 1.0 : 0.0;
}

__global__ void element_wise_initHasub_kernal(cupdlp_float *hasub,
const cupdlp_float *ub,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) hasub[i] = ub[i] < bound ? 1.0 : 0.0;
}

__global__ void element_wise_filterlb_kernal(cupdlp_float *x,
const cupdlp_float *lb,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = lb[i] > bound ? lb[i] : 0.0;
}

__global__ void element_wise_filterub_kernal(cupdlp_float *x,
const cupdlp_float *ub,
const cupdlp_float bound,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = ub[i] < bound ? ub[i] : 0.0;
}

__global__ void init_cuda_vec_kernal(cupdlp_float *x, const cupdlp_float val,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) x[i] = val;
}

//xUpdate = x - dPrimalStep * (cost - ATy)
__global__ void primal_grad_step_kernal(cupdlp_float *xUpdate,
const cupdlp_float *x,
const cupdlp_float *cost,
const cupdlp_float *ATy,
const cupdlp_float dPrimalStep,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) xUpdate[i] = x[i] - dPrimalStep * (cost[i] - ATy[i]);
}

//yUpdate = y + dDualStep * (b -2AxUpdate + Ax)
__global__ void dual_grad_step_kernal(cupdlp_float *yUpdate,
const cupdlp_float *y,
const cupdlp_float *b,
const cupdlp_float *Ax,
const cupdlp_float *AxUpdate,
const cupdlp_float dDualStep,
const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) yUpdate[i] = y[i] + dDualStep * (b[i] - 2 * AxUpdate[i] + Ax[i]);
}

// z = x - y
__global__ void naive_sub_kernal(cupdlp_float *z, const cupdlp_float *x,
const cupdlp_float *y, const cupdlp_int len) {
cupdlp_int i = blockIdx.x * blockDim.x + threadIdx.x;
if (i < len) z[i] = x[i] - y[i];
}
148 changes: 148 additions & 0 deletions src/pdlp/cupdlp/cuda/cupdlp_cuda_kernels.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
#ifndef CUPDLP_CUDA_KERNALS_H
#define CUPDLP_CUDA_KERNALS_H

#include "cuda_runtime.h"
#define CUPDLP_BLOCK_SIZE 512

#ifndef SFLOAT
#ifdef DLONG
typedef long long cupdlp_int;
#else
typedef int cupdlp_int;
#endif
typedef double cupdlp_float;
#define CudaComputeType CUDA_R_64F
#else
#define CudaComputeType CUDA_R_32F
#endif

#define CHECK_CUDA(func) \
{ \
cudaError_t status = (func); \
if (status != cudaSuccess) { \
printf("CUDA API failed at line %d of %s with error: %s (%d)\n", \
__LINE__, __FILE__, cudaGetErrorString(status), status); \
return EXIT_FAILURE; \
} \
}

#define CHECK_CUSPARSE(func) \
{ \
cusparseStatus_t status = (func); \
if (status != CUSPARSE_STATUS_SUCCESS) { \
printf("CUSPARSE API failed at line %d of %s with error: %s (%d)\n", \
__LINE__, __FILE__, cusparseGetErrorString(status), status); \
return EXIT_FAILURE; \
} \
}

#define CHECK_CUBLAS(func) \
{ \
cublasStatus_t status = (func); \
if (status != CUBLAS_STATUS_SUCCESS) { \
printf("CUBLAS API failed at line %d of %s with error: %s (%d)\n", \
__LINE__, __FILE__, cublasGetStatusString(status), status); \
return EXIT_FAILURE; \
} \
}

#define CUPDLP_FREE_VEC(x) \
{ \
cudaFree(x); \
x = cupdlp_NULL; \
}

#define CUPDLP_COPY_VEC(dst, src, type, size) \
cudaMemcpy(dst, src, sizeof(type) * (size), cudaMemcpyDefault)

#define CUPDLP_INIT_VEC(var, size) \
{ \
cusparseStatus_t status = \
cudaMalloc((void **)&var, (size) * sizeof(typeof(*var))); \
if (status != CUSPARSE_STATUS_SUCCESS) { \
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", __LINE__, \
cusparseGetErrorString(status), status); \
goto exit_cleanup; \
} \
}
#define CUPDLP_INIT_ZERO_VEC(var, size) \
{ \
cusparseStatus_t status = \
cudaMalloc((void **)&var, (size) * sizeof(typeof(*var))); \
if (status != CUSPARSE_STATUS_SUCCESS) { \
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", __LINE__, \
cusparseGetErrorString(status), status); \
goto exit_cleanup; \
} \
status = cudaMemset(var, 0, (size) * sizeof(typeof(*var))); \
if (status != CUSPARSE_STATUS_SUCCESS) { \
printf("CUSPARSE API failed at line %d with error: %s (%d)\n", __LINE__, \
cusparseGetErrorString(status), status); \
goto exit_cleanup; \
} \
}
#define CUPDLP_ZERO_VEC(var, type, size) \
cudaMemset(var, 0, sizeof(type) * (size))

dim3 cuda_gridsize(cupdlp_int n);

__global__ void element_wise_dot_kernel(cupdlp_float *x, const cupdlp_float *y,
const cupdlp_int len);

__global__ void element_wise_div_kernel(cupdlp_float *x, const cupdlp_float *y,
const cupdlp_int len);

__global__ void element_wise_projlb_kernel(cupdlp_float *x,
const cupdlp_float *lb,
const cupdlp_int len);

__global__ void element_wise_projub_kernel(cupdlp_float *x,
const cupdlp_float *ub,
const cupdlp_int len);

__global__ void element_wise_projSamelb_kernel(cupdlp_float *x,
const cupdlp_float lb,
const cupdlp_int len);

__global__ void element_wise_projSameub_kernel(cupdlp_float *x,
const cupdlp_float ub,
const cupdlp_int len);

__global__ void element_wise_initHaslb_kernal(cupdlp_float *haslb,
const cupdlp_float *lb,
const cupdlp_float bound,
const cupdlp_int len);

__global__ void element_wise_initHasub_kernal(cupdlp_float *hasub,
const cupdlp_float *ub,
const cupdlp_float bound,
const cupdlp_int len);

__global__ void element_wise_filterlb_kernal(cupdlp_float *x,
const cupdlp_float *lb,
const cupdlp_float bound,
const cupdlp_int len);

__global__ void element_wise_filterub_kernal(cupdlp_float *x,
const cupdlp_float *ub,
const cupdlp_float bound,
const cupdlp_int len);

__global__ void init_cuda_vec_kernal(cupdlp_float *x, const cupdlp_float val,
const cupdlp_int len);

__global__ void primal_grad_step_kernal(cupdlp_float *xUpdate,
const cupdlp_float *x,
const cupdlp_float *cost,
const cupdlp_float *ATy,
const cupdlp_float dPrimalStep,
const cupdlp_int len);

__global__ void dual_grad_step_kernal(
cupdlp_float *yUpdate, const cupdlp_float *y, const cupdlp_float *b,
const cupdlp_float *Ax, const cupdlp_float *AxUpdate,
const cupdlp_float dDualStep, const cupdlp_int len);

__global__ void naive_sub_kernal(cupdlp_float *z, const cupdlp_float *x,
const cupdlp_float *y, const cupdlp_int len);
#endif
Loading

0 comments on commit 0ac0cec

Please sign in to comment.