Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nodal Solver: Use multi-color Gauss-Seidel on GPU #4043

Merged
merged 2 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Src/Boundary/Make.package
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
ifndef AMREX_BOUNDARY_MAKE
AMREX_BOUNDARY_MAKE := 1

CEXE_sources += AMReX_Mask.cpp AMReX_MultiMask.cpp AMReX_LO_BCTYPES.cpp

Expand Down Expand Up @@ -27,3 +29,4 @@ endif
VPATH_LOCATIONS += $(AMREX_HOME)/Src/Boundary
INCLUDE_LOCATIONS += $(AMREX_HOME)/Src/Boundary

endif
96 changes: 81 additions & 15 deletions Src/LinearSolvers/MLMG/AMReX_MLNodeLap_1D_K.H
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ void mlndlap_normalize_aa (int i, int j, int k, Array4<Real> const& x,
mlndlap_normalize_ha(i,j,k,x,sx,msk,dxinv);
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
void mlndlap_jacobi_ha (int i, int, int, Array4<Real> const& sol, Real Ax,
Array4<Real const> const& rhs, Array4<Real const> const& sx,
Array4<int const> const& msk,
Expand All @@ -208,15 +208,15 @@ void mlndlap_jacobi_ha (int i, int, int, Array4<Real> const& sol, Real Ax,
}
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_jacobi_ha (Box const& bx, Array4<Real> const& sol, Array4<Real const> const& Ax,
Array4<Real const> const& rhs, Array4<Real const> const& sx,
Array4<int const> const& msk,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
Real fac = -dxinv[0]*dxinv[0];

amrex::LoopConcurrent(bx, [=] (int i, int, int) noexcept
amrex::LoopConcurrentOnCpu(bx, [&] (int i, int, int) noexcept
{
if (msk(i,0,0)) {
sol(i,0,0) = Real(0.0);
Expand All @@ -227,23 +227,23 @@ void mlndlap_jacobi_ha (Box const& bx, Array4<Real> const& sol, Array4<Real cons
});
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
void mlndlap_jacobi_aa (int i, int j, int k, Array4<Real> const& sol, Real Ax,
Array4<Real const> const& rhs, Array4<Real const> const& sig,
Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
mlndlap_jacobi_ha(i,j,k,sol,Ax,rhs,sig,msk,dxinv);
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_jacobi_aa (Box const& bx, Array4<Real> const& sol, Array4<Real const> const& Ax,
Array4<Real const> const& rhs, Array4<Real const> const& sig,
Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
mlndlap_jacobi_ha(bx,sol,Ax,rhs,sig,msk,dxinv);
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
AMREX_GPU_DEVICE AMREX_FORCE_INLINE
void mlndlap_jacobi_c (int i, int, int, Array4<Real> const& sol, Real Ax,
Array4<Real const> const& rhs, Real sig,
Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
Expand All @@ -256,12 +256,12 @@ void mlndlap_jacobi_c (int i, int, int, Array4<Real> const& sol, Real Ax,
}
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_jacobi_c (Box const& bx, Array4<Real> const& sol, Array4<Real const> const& Ax,
Array4<Real const> const& rhs, Real sig,
Array4<int const> const& msk, GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
amrex::LoopConcurrent(bx, [=] (int i, int, int) noexcept
amrex::LoopConcurrentOnCpu(bx, [&] (int i, int, int) noexcept
{
if (msk(i,0,0)) {
sol(i,0,0) = Real(0.0);
Expand All @@ -272,7 +272,7 @@ void mlndlap_jacobi_c (Box const& bx, Array4<Real> const& sol, Array4<Real const
});
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_gauss_seidel_ha (Box const& bx, Array4<Real> const& sol,
Array4<Real const> const& rhs,
Array4<Real const> const& sx,
Expand All @@ -281,7 +281,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4<Real> const& sol,
{
Real fac = dxinv[0]*dxinv[0];

amrex::Loop(bx, [=] (int i, int, int) noexcept
amrex::LoopOnCpu(bx, [&] (int i, int, int) noexcept
{
if (msk(i,0,0)) {
sol(i,0,0) = Real(0.0);
Expand All @@ -295,7 +295,7 @@ void mlndlap_gauss_seidel_ha (Box const& bx, Array4<Real> const& sol,
});
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_gauss_seidel_aa (Box const& bx, Array4<Real> const& sol,
Array4<Real const> const& rhs,
Array4<Real const> const& sx,
Expand All @@ -305,15 +305,15 @@ void mlndlap_gauss_seidel_aa (Box const& bx, Array4<Real> const& sol,
mlndlap_gauss_seidel_ha(bx,sol,rhs,sx,msk,dxinv);
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_gauss_seidel_c (Box const& bx, Array4<Real> const& sol,
Array4<Real const> const& rhs, Real sig,
Array4<int const> const& msk,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv) noexcept
{
Real fac = dxinv[0]*dxinv[0];

amrex::Loop(bx, [=] (int i, int, int) noexcept
amrex::LoopOnCpu(bx, [&] (int i, int, int) noexcept
{
if (msk(i,0,0)) {
sol(i,0,0) = Real(0.0);
Expand All @@ -327,7 +327,7 @@ void mlndlap_gauss_seidel_c (Box const& bx, Array4<Real> const& sol,
});
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_gauss_seidel_with_line_solve_aa(Box const&, Array4<Real> const&,
Array4<Real const> const&, Array4<Real const> const&,
Array4<int const> const&, GpuArray<Real,AMREX_SPACEDIM> const&) noexcept
Expand Down Expand Up @@ -556,7 +556,7 @@ Real mlndlap_adotx_sten (int /*i*/, int /*j*/, int /*k*/, Array4<Real const> con
Array4<Real const> const&, Array4<int const> const&) noexcept
{ return Real(0.0); }

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
inline
void mlndlap_gauss_seidel_sten (Box const&, Array4<Real> const&,
Array4<Real const> const&,
Array4<Real const> const&,
Expand All @@ -575,6 +575,72 @@ void mlndlap_restriction_rap (int /*i*/, int /*j*/, int /*k*/, Array4<Real> cons
Array4<int const> const&) noexcept
{}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
int mlndlap_color (int i, int, int)
{
return i%2;
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
void mlndlap_gscolor_ha (int i, int j, int k, Array4<Real> const& sol,
Array4<Real const> const& rhs,
Array4<Real const> const& sx,
Array4<int const> const& msk,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv, int color) noexcept
{
if (mlndlap_color(i,j,k) == color) {
if (msk(i,0,0)) {
sol(i,0,0) = Real(0.0);
} else {
Real fac = dxinv[0]*dxinv[0];

Real s0 = Real(-1.0) * fac * (sx(i-1,0,0)+sx(i,0,0));
Real Ax = sol(i-1,0,0)*fac*sx(i-1,0,0)
+ sol(i+1,0,0)*fac*sx(i ,0,0)
+ sol(i ,0,0)*s0;
sol(i,0,0) += (rhs(i,0,0) - Ax) / s0;
}
}
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
void mlndlap_gscolor_aa (int i, int j, int k, Array4<Real> const& sol,
Array4<Real const> const& rhs,
Array4<Real const> const& sx,
Array4<int const> const& msk,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv, int color) noexcept
{
mlndlap_gscolor_ha(i,j,k,sol,rhs,sx,msk,dxinv,color);
}

AMREX_GPU_DEVICE AMREX_FORCE_INLINE
void mlndlap_gscolor_c (int i, int j, int k, Array4<Real> const& sol,
Array4<Real const> const& rhs, Real sig,
Array4<int const> const& msk,
GpuArray<Real,AMREX_SPACEDIM> const& dxinv, int color) noexcept
{
if (mlndlap_color(i,j,k) == color) {
if (msk(i,0,0)) {
sol(i,0,0) = Real(0.0);
} else {
Real fac = dxinv[0]*dxinv[0];

Real s0 = Real(-2.0) * fac * sig;
Real Ax = sol(i-1,0,0)*fac*sig
+ sol(i+1,0,0)*fac*sig
+ sol(i ,0,0)*s0;
sol(i,0,0) += (rhs(i,0,0) - Ax) / s0;
}
}
}

AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
void mlndlap_gscolor_sten (int, int, int, Array4<Real> const&,
Array4<Real const> const&,
Array4<Real const> const&,
Array4<int const> const&, int) noexcept
{}

}

#endif
Loading
Loading