Add Jacobi smoother to ABecLaplacian and Poisson

It converges slower but can help keep preexisting symmetry.
AMReX-Codes · Aug 18, 2024 · 9d81563 · 9d81563
1 parent 8222a13
commit 9d81563
Show file tree

Hide file tree

Showing 11 changed files with 962 additions and 253 deletions.
diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_1D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_1D_K.H
@@ -157,6 +157,69 @@ void abec_gsrb_os (int i, int, int, int n, Array4<T> const& phi, Array4<T const>
 
 template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void abec_jacobi (int i, int, int, int n, Array4<T> const& phi,
+                  Array4<T const> const& rhs, Array4<T const> const& Ax,
+                  T alpha, Array4<T const> const& a,
+                  T dhx,
+                  Array4<T const> const& bX,
+                  Array4<int const> const& m0,
+                  Array4<int const> const& m1,
+                  Array4<T const> const& f0,
+                  Array4<T const> const& f1,
+                  Box const& vbox) noexcept
+{
+    const auto vlo = amrex::lbound(vbox);
+    const auto vhi = amrex::ubound(vbox);
+
+    T cf0 = (i == vlo.x && m0(vlo.x-1,0,0) > 0)
+        ? f0(vlo.x,0,0,n) : T(0.0);
+    T cf1 = (i == vhi.x && m1(vhi.x+1,0,0) > 0)
+        ? f1(vhi.x,0,0,n) : T(0.0);
+
+    T delta = dhx*(bX(i,0,0,n)*cf0 + bX(i+1,0,0,n)*cf1);
+
+    T gamma = alpha*a(i,0,0)
+        +   dhx*( bX(i,0,0,n) + bX(i+1,0,0,n) );
+
+    phi(i,0,0,n) += T(2.0/3.0) * (rhs(i,0,0,n) - Ax(i,0,0,n)) / (gamma - delta);
+}
+
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void abec_jacobi_os (int i, int, int, int n, Array4<T> const& phi,
+                     Array4<T const> const& rhs, Array4<T const> const& Ax,
+                     T alpha, Array4<T const> const& a,
+                     T dhx,
+                     Array4<T const> const& bX,
+                     Array4<int const> const& m0,
+                     Array4<int const> const& m1,
+                     Array4<T const> const& f0,
+                     Array4<T const> const& f1,
+                     Array4<int const> const& osm,
+                     Box const& vbox) noexcept
+{
+    if (osm(i,0,0) == 0) {
+        phi(i,0,0) = T(0.0);
+    } else {
+        const auto vlo = amrex::lbound(vbox);
+        const auto vhi = amrex::ubound(vbox);
+
+        T cf0 = (i == vlo.x && m0(vlo.x-1,0,0) > 0)
+            ? f0(vlo.x,0,0,n) : T(0.0);
+        T cf1 = (i == vhi.x && m1(vhi.x+1,0,0) > 0)
+            ? f1(vhi.x,0,0,n) : T(0.0);
+
+        T delta = dhx*(bX(i,0,0,n)*cf0 + bX(i+1,0,0,n)*cf1);
+
+        T gamma = alpha*a(i,0,0)
+            +   dhx*( bX(i,0,0,n) + bX(i+1,0,0,n) );
+
+        phi(i,0,0,n) += T(2.0/3.0) * (rhs(i,0,0,n) - Ax(i,0,0,n)) / (gamma - delta);
+    }
+}
+
+template <typename T>
+AMREX_FORCE_INLINE
 void abec_gsrb_with_line_solve (
                 Box const& /*box*/, Array4<T> const& /*phi*/, Array4<T const> const& /*rhs*/,
                 T /*alpha*/, Array4<T const> const& /*a*/,

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_2D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_2D_K.H
@@ -230,6 +230,81 @@ void abec_gsrb_os (int i, int j, int, int n, Array4<T> const& phi, Array4<T cons
 
 template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void abec_jacobi (int i, int j, int, int n, Array4<T> const& phi,
+                  Array4<T const> const& rhs, Array4< T const> const& Ax,
+                  T alpha, Array4<T const> const& a,
+                  T dhx, T dhy,
+                  Array4<T const> const& bX, Array4<T const> const& bY,
+                  Array4<int const> const& m0, Array4<int const> const& m2,
+                  Array4<int const> const& m1, Array4<int const> const& m3,
+                  Array4<T const> const& f0, Array4<T const> const& f2,
+                  Array4<T const> const& f1, Array4<T const> const& f3,
+                  Box const& vbox) noexcept
+{
+    const auto vlo = amrex::lbound(vbox);
+    const auto vhi = amrex::ubound(vbox);
+
+    T cf0 = (i == vlo.x && m0(vlo.x-1,j,0) > 0)
+        ? f0(vlo.x,j,0,n) : T(0.0);
+    T cf1 = (j == vlo.y && m1(i,vlo.y-1,0) > 0)
+        ? f1(i,vlo.y,0,n) : T(0.0);
+    T cf2 = (i == vhi.x && m2(vhi.x+1,j,0) > 0)
+        ? f2(vhi.x,j,0,n) : T(0.0);
+    T cf3 = (j == vhi.y && m3(i,vhi.y+1,0) > 0)
+        ? f3(i,vhi.y,0,n) : T(0.0);
+
+    T delta = dhx*(bX(i,j,0,n)*cf0 + bX(i+1,j,0,n)*cf2)
+           +  dhy*(bY(i,j,0,n)*cf1 + bY(i,j+1,0,n)*cf3);
+
+    T gamma = alpha*a(i,j,0)
+        +   dhx*( bX(i,j,0,n) + bX(i+1,j,0,n) )
+        +   dhy*( bY(i,j,0,n) + bY(i,j+1,0,n) );
+
+    phi(i,j,0,n) += T(2.0/3.0) * (rhs(i,j,0,n) - Ax(i,j,0,n)) / (gamma - delta);
+}
+
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void abec_jacobi_os (int i, int j, int, int n, Array4<T> const& phi,
+                     Array4<T const> const& rhs, Array4<T const> const& Ax,
+                     T alpha, Array4<T const> const& a,
+                     T dhx, T dhy,
+                     Array4<T const> const& bX, Array4<T const> const& bY,
+                     Array4<int const> const& m0, Array4<int const> const& m2,
+                     Array4<int const> const& m1, Array4<int const> const& m3,
+                     Array4<T const> const& f0, Array4<T const> const& f2,
+                     Array4<T const> const& f1, Array4<T const> const& f3,
+                     Array4<int const> const& osm,
+                     Box const& vbox) noexcept
+{
+    if (osm(i,j,0) == 0) {
+        phi(i,j,0,n) = T(0.0);
+    } else {
+        const auto vlo = amrex::lbound(vbox);
+        const auto vhi = amrex::ubound(vbox);
+
+        T cf0 = (i == vlo.x && m0(vlo.x-1,j,0) > 0)
+            ? f0(vlo.x,j,0,n) : T(0.0);
+        T cf1 = (j == vlo.y && m1(i,vlo.y-1,0) > 0)
+            ? f1(i,vlo.y,0,n) : T(0.0);
+        T cf2 = (i == vhi.x && m2(vhi.x+1,j,0) > 0)
+            ? f2(vhi.x,j,0,n) : T(0.0);
+        T cf3 = (j == vhi.y && m3(i,vhi.y+1,0) > 0)
+            ? f3(i,vhi.y,0,n) : T(0.0);
+
+        T delta = dhx*(bX(i,j,0,n)*cf0 + bX(i+1,j,0,n)*cf2)
+               +  dhy*(bY(i,j,0,n)*cf1 + bY(i,j+1,0,n)*cf3);
+
+        T gamma = alpha*a(i,j,0)
+            +   dhx*( bX(i,j,0,n) + bX(i+1,j,0,n) )
+            +   dhy*( bY(i,j,0,n) + bY(i,j+1,0,n) );
+
+        phi(i,j,0,n) += T(2.0/3.0) * (rhs(i,j,0,n) - Ax(i,j,0,n)) / (gamma - delta);
+    }
+}
+
+template <typename T>
+AMREX_FORCE_INLINE
 void abec_gsrb_with_line_solve (
                 Box const& box, Array4<T> const& phi, Array4<T const> const& rhs,
                 T alpha, Array4<T const> const& a,

diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLap_3D_K.H
@@ -329,6 +329,106 @@ void abec_gsrb_os (int i, int j, int k, int n,
 
 template <typename T>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void abec_jacobi (int i, int j, int k, int n, Array4<T> const& phi,
+                  Array4<T const> const& rhs, Array4<T const> const& Ax,
+                  T alpha, Array4<T const> const& a,
+                  T dhx, T dhy, T dhz,
+                  Array4<T const> const& bX, Array4<T const> const& bY,
+                  Array4<T const> const& bZ,
+                  Array4<int const> const& m0, Array4<int const> const& m2,
+                  Array4<int const> const& m4,
+                  Array4<int const> const& m1, Array4<int const> const& m3,
+                  Array4<int const> const& m5,
+                  Array4<T const> const& f0, Array4<T const> const& f2,
+                  Array4<T const> const& f4,
+                  Array4<T const> const& f1, Array4<T const> const& f3,
+                  Array4<T const> const& f5,
+                  Box const& vbox) noexcept
+{
+    const auto vlo = amrex::lbound(vbox);
+    const auto vhi = amrex::ubound(vbox);
+
+    T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
+        ? f0(vlo.x,j,k,n) : T(0.0);
+    T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
+        ? f1(i,vlo.y,k,n) : T(0.0);
+    T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
+        ? f2(i,j,vlo.z,n) : T(0.0);
+    T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
+        ? f3(vhi.x,j,k,n) : T(0.0);
+    T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
+        ? f4(i,vhi.y,k,n) : T(0.0);
+    T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
+        ? f5(i,j,vhi.z,n) : T(0.0);
+
+    T gamma = alpha*a(i,j,k)
+        +   dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
+        +   dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
+        +   dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
+
+    T g_m_d = gamma
+        - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
+        +  dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
+        +  dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
+
+    phi(i,j,k,n) += T(2.0/3.0) * (rhs(i,j,k,n) - Ax(i,j,k,n)) / g_m_d;
+}
+
+template <typename T>
+AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+void abec_jacobi_os (int i, int j, int k, int n,
+                     Array4<T> const& phi, Array4<T const> const& rhs,
+                     Array4<T const> const& Ax,
+                     T alpha, Array4<T const> const& a,
+                     T dhx, T dhy, T dhz,
+                     Array4<T const> const& bX, Array4<T const> const& bY,
+                     Array4<T const> const& bZ,
+                     Array4<int const> const& m0, Array4<int const> const& m2,
+                     Array4<int const> const& m4,
+                     Array4<int const> const& m1, Array4<int const> const& m3,
+                     Array4<int const> const& m5,
+                     Array4<T const> const& f0, Array4<T const> const& f2,
+                     Array4<T const> const& f4,
+                     Array4<T const> const& f1, Array4<T const> const& f3,
+                     Array4<T const> const& f5,
+                     Array4<int const> const& osm,
+                     Box const& vbox) noexcept
+{
+    if (osm(i,j,k) == 0) {
+        phi(i,j,k,n) = T(0.0);
+    } else {
+        const auto vlo = amrex::lbound(vbox);
+        const auto vhi = amrex::ubound(vbox);
+
+        T cf0 = (i == vlo.x && m0(vlo.x-1,j,k) > 0)
+            ? f0(vlo.x,j,k,n) : T(0.0);
+        T cf1 = (j == vlo.y && m1(i,vlo.y-1,k) > 0)
+            ? f1(i,vlo.y,k,n) : T(0.0);
+        T cf2 = (k == vlo.z && m2(i,j,vlo.z-1) > 0)
+            ? f2(i,j,vlo.z,n) : T(0.0);
+        T cf3 = (i == vhi.x && m3(vhi.x+1,j,k) > 0)
+            ? f3(vhi.x,j,k,n) : T(0.0);
+        T cf4 = (j == vhi.y && m4(i,vhi.y+1,k) > 0)
+            ? f4(i,vhi.y,k,n) : T(0.0);
+        T cf5 = (k == vhi.z && m5(i,j,vhi.z+1) > 0)
+            ? f5(i,j,vhi.z,n) : T(0.0);
+
+        T gamma = alpha*a(i,j,k)
+            +   dhx*(bX(i,j,k,n)+bX(i+1,j,k,n))
+            +   dhy*(bY(i,j,k,n)+bY(i,j+1,k,n))
+            +   dhz*(bZ(i,j,k,n)+bZ(i,j,k+1,n));
+
+        T g_m_d = gamma
+            - (dhx*(bX(i,j,k,n)*cf0 + bX(i+1,j,k,n)*cf3)
+            +  dhy*(bY(i,j,k,n)*cf1 + bY(i,j+1,k,n)*cf4)
+            +  dhz*(bZ(i,j,k,n)*cf2 + bZ(i,j,k+1,n)*cf5));
+
+        phi(i,j,k,n) += T(2.0/3.0) * (rhs(i,j,k,n) - Ax(i,j,k,n)) / g_m_d;
+    }
+}
+
+template <typename T>
+AMREX_FORCE_INLINE
 void tridiagonal_solve (Array1D<T,0,31>& a_ls, Array1D<T,0,31>& b_ls, Array1D<T,0,31>& c_ls,
                         Array1D<T,0,31>& r_ls, Array1D<T,0,31>& u_ls, Array1D<T,0,31>& gam,
                         int ilen ) noexcept
@@ -348,7 +448,7 @@ void tridiagonal_solve (Array1D<T,0,31>& a_ls, Array1D<T,0,31>& b_ls, Array1D<T,
 }
 
 template <typename T>
-AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+AMREX_FORCE_INLINE
 void abec_gsrb_with_line_solve (
                 Box const& box, Array4<T> const& phi, Array4<T const> const& rhs,
                 T alpha, Array4<T const> const& a,