From 2b4eb94e59dc5cb456c43aca2e436d28f1dd292d Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Fri, 6 Sep 2024 06:17:38 -0700 Subject: [PATCH 1/9] STL optimization: Bounding volume hierarchy Speed up EB geometry generation with STL by using the bounding volume hierarchy (BVH) method. The BVH tree is stored in a contiguous chunk of memory making it easier for GPUs. Using a fixed size stack, recursion is avoided when traversing the tree. --- Src/EB/AMReX_EB2.cpp | 5 +- Src/EB/AMReX_EB2_IndexSpace_STL.H | 2 +- Src/EB/AMReX_EB2_IndexSpace_STL.cpp | 4 +- Src/EB/AMReX_EB_STL_utils.H | 86 +++-- Src/EB/AMReX_EB_STL_utils.cpp | 562 +++++++++++++++++++++++----- Src/EB/AMReX_EB_triGeomOps_K.H | 24 +- 6 files changed, 543 insertions(+), 140 deletions(-) diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index f99eb504d2f..3684e87b473 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -216,6 +216,8 @@ Build (const Geometry& geom, int required_coarsening_level, pp.queryAdd("stl_center", stl_center); bool stl_reverse_normal = false; pp.queryAdd("stl_reverse_normal", stl_reverse_normal); + bool stl_bvh_opt = true; + pp.queryAdd("stl_bvh_opt", stl_bvh_opt); IndexSpace::push(new IndexSpaceSTL(stl_file, stl_scale, // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks) {stl_center[0], stl_center[1], stl_center[2]}, int(stl_reverse_normal), @@ -223,7 +225,8 @@ Build (const Geometry& geom, int required_coarsening_level, max_coarsening_level, ngrow, build_coarse_level_by_coarsening, a_extend_domain_face, - a_num_coarsen_opt)); + a_num_coarsen_opt, + stl_bvh_opt)); } else { diff --git a/Src/EB/AMReX_EB2_IndexSpace_STL.H b/Src/EB/AMReX_EB2_IndexSpace_STL.H index 0c72d076ea2..f974daba7a2 100644 --- a/Src/EB/AMReX_EB2_IndexSpace_STL.H +++ b/Src/EB/AMReX_EB2_IndexSpace_STL.H @@ -19,7 +19,7 @@ public: const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face, int num_coarsen_opt); + bool extend_domain_face, int num_coarsen_opt, bool bvh_optimization); IndexSpaceSTL (IndexSpaceSTL const&) = delete; IndexSpaceSTL (IndexSpaceSTL &&) = delete; diff --git a/Src/EB/AMReX_EB2_IndexSpace_STL.cpp b/Src/EB/AMReX_EB2_IndexSpace_STL.cpp index 70e3b492d82..f8f62684f2f 100644 --- a/Src/EB/AMReX_EB2_IndexSpace_STL.cpp +++ b/Src/EB/AMReX_EB2_IndexSpace_STL.cpp @@ -7,11 +7,13 @@ IndexSpaceSTL::IndexSpaceSTL (const std::string& stl_file, Real stl_scale, const Geometry& geom, int required_coarsening_level, int max_coarsening_level, int ngrow, bool build_coarse_level_by_coarsening, - bool extend_domain_face, int num_coarsen_opt) + bool extend_domain_face, int num_coarsen_opt, + bool bvh_optimization) { Gpu::LaunchSafeGuard lsg(true); // Always use GPU STLtools stl_tools; + stl_tools.setBVHOptimization(bvh_optimization); stl_tools.read_stl_file(stl_file, stl_scale, stl_center, stl_reverse_normal); // build finest level (i.e., level 0) first diff --git a/Src/EB/AMReX_EB_STL_utils.H b/Src/EB/AMReX_EB_STL_utils.H index eb277202cde..9cd4b3e84d3 100644 --- a/Src/EB/AMReX_EB_STL_utils.H +++ b/Src/EB/AMReX_EB_STL_utils.H @@ -7,6 +7,10 @@ #include #include +#include +#include +#include + namespace amrex { @@ -15,33 +19,48 @@ class STLtools public: struct Triangle { XDim3 v1, v2, v3; - }; - - static constexpr int allregular = -1; - static constexpr int mixedcells = 0; - static constexpr int allcovered = 1; - -private: - Gpu::PinnedVector m_tri_pts_h; - Gpu::DeviceVector m_tri_pts_d; - Gpu::DeviceVector m_tri_normals_d; + Real cent (int d) const + { + static_assert(sizeof(XDim3) == sizeof(Real)*3); + return Real(1./3.)*((&v1.x)[d] + (&v2.x)[d] + (&v3.x)[d]); + } + + std::pair minmax (int d) const + { + static_assert(sizeof(XDim3) == sizeof(Real)*3); + return std::minmax({(&v1.x)[d], (&v2.x)[d], (&v3.x)[d]}); + } + }; - int m_num_tri=0; + template + struct BVHNodeT + { + RealBox boundingbox{AMREX_D_DECL(std::numeric_limits::max(), + std::numeric_limits::max(), + std::numeric_limits::max()), + AMREX_D_DECL(std::numeric_limits::lowest(), + std::numeric_limits::lowest(), + std::numeric_limits::lowest())}; + STLtools::Triangle triangles[M]; + XDim3 trinorm[M]; + int children[N]; + std::int8_t ntriangles = 0; + std::int8_t nchildren = 0; + }; - XDim3 m_ptmin; // All triangles are inside the bounding box defined by - XDim3 m_ptmax; // m_ptmin and m_ptmax. - XDim3 m_ptref; // The reference point is slightly outside the bounding box. - bool m_boundry_is_outside; // Is the bounding box boundary outside or inside the object? + static constexpr int m_bvh_max_size = 4; // max # of triangles in a leaf node + static constexpr int m_bvh_max_splits = 4; // max # of children + static constexpr int m_bvh_max_stack_size = 12; // max depth of the tree - void read_ascii_stl_file (std::string const& fname, Real scale, - Array const& center, int reverse_normal); - void read_binary_stl_file (std::string const& fname, Real scale, - Array const& center, int reverse_normal); + using Node = BVHNodeT; -public: + static constexpr int allregular = -1; + static constexpr int mixedcells = 0; + static constexpr int allcovered = 1; - void prepare (); // public for cuda + // xxxxx yes, no, auto + void setBVHOptimization (bool flag) { m_bvh_optimization = flag; } void read_stl_file (std::string const& fname, Real scale, Array const& center, int reverse_normal); @@ -65,6 +84,31 @@ public: Array,AMREX_SPACEDIM> const& type_arr, Array4 const& lst, Geometry const& geom) ; + void prepare (Gpu::PinnedVector a_tri_pts); // public for cuda + +private: + + bool m_bvh_optimization = true; + + Gpu::DeviceVector m_tri_pts_d; + Gpu::DeviceVector m_tri_normals_d; + Gpu::DeviceVector m_bvh_nodes; + + int m_num_tri=0; + + XDim3 m_ptmin; // All triangles are inside the bounding box defined by + XDim3 m_ptmax; // m_ptmin and m_ptmax. + XDim3 m_ptref; // The reference point is slightly outside the bounding box. + bool m_boundry_is_outside; // Is the bounding box boundary outside or inside the object? + + void read_ascii_stl_file (std::string const& fname, Real scale, + Array const& center, int reverse_normal, + Gpu::PinnedVector& a_tri_pts); + void read_binary_stl_file (std::string const& fname, Real scale, + Array const& center, int reverse_normal, + Gpu::PinnedVector& a_tri_pts); + + static void build_bvh (Triangle* begin, Triangle * end, Gpu::PinnedVector& bvh_nodes); }; } diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index f7ce3045d9b..7b61e35556b 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -1,12 +1,30 @@ +#include #include #include #include +#include +#include + #include +#include namespace amrex { namespace { + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + XDim3 triangle_norm (STLtools::Triangle const& tri) + { + XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z}; + XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z}; + XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y, + vec1.z*vec2.x-vec1.x*vec2.z, + vec1.x*vec2.y-vec1.y*vec2.x}; + Real tmp = 1._rt / std::sqrt(norm.x*norm.x + norm.y*norm.y + norm.z*norm.z); + return {norm.x * tmp, norm.y * tmp, norm.z * tmp}; + } + // Does line ab intersect with the triangle? AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool line_tri_intersects (Real a[3], Real b[3], STLtools::Triangle const& tri) @@ -89,12 +107,95 @@ namespace { return std::make_pair(false,0.0_rt); } } + + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + bool line_box_intersects (Real const a[3], Real const b[3], RealBox const& box) + { + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + if ((a[idim] < box.lo(idim) && b[idim] < box.lo(idim)) || + (a[idim] > box.hi(idim) && b[idim] > box.hi(idim))) { + return false; + } + } + if (box.contains(a) || box.contains(b)) { + return true; + } + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + // Note that we have made bounding box slightly bigger. So it's + // safe to assume that a line in the plane does not intersect + // with the actual bounding box. + if (a[idim] == b[idim]) { continue; } + Real xi[] = {box.lo(idim), box.hi(idim)}; + for (int face = 0; face < 2; ++face) { + if (!((a[idim] > xi[face] && b[idim] > xi[face]) || + (a[idim] < xi[face] && b[idim] < xi[face]))) + { + Real w = (xi[face]-a[idim]) / (b[idim]-a[idim]); + bool inside = true; + for (int jdim = 0; jdim < AMREX_SPACEDIM; ++jdim) { + if (idim != jdim) { + Real xpt = a[jdim] + (b[jdim]-a[jdim]) * w; + inside = inside && (xpt >= box.lo(jdim) + && xpt <= box.hi(jdim)); + } + } + if (inside) { return true; } + } + } + } + + return false; + } + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + void bvh_line_tri_intersects (Real a[3], Real b[3], + STLtools::BVHNodeT const* root, + F&& f) + { + // Use stack to avoid recursion + Stack nodes_to_do; + Stack nchildren_done; + + if (line_box_intersects(a, b, root->boundingbox)) { + nodes_to_do.push(0); + nchildren_done.push(0); + } + + while (!nodes_to_do.empty()) { + auto const& node = root[nodes_to_do.top()]; + if (node.nchildren == 0) { // leaf node + int ret = f(node.ntriangles, node.triangles, node.trinorm); + if (ret != 0) { break; } + nodes_to_do.pop(); + nchildren_done.pop(); + } else { + auto& ndone = nchildren_done.top(); + if (ndone < node.nchildren) { + for (int ichild = ndone; ichild < node.nchildren; ++ichild) { + ++ndone; + int inode = node.children[ichild]; + if (line_box_intersects(a, b, root[inode].boundingbox)) { + nodes_to_do.push(inode); + nchildren_done.push(0); + break; + } + } + } else { + nodes_to_do.pop(); + nchildren_done.pop(); + } + } + } + } } void STLtools::read_stl_file (std::string const& fname, Real scale, Array const& center, int reverse_normal) { + Gpu::PinnedVector tri_pts; + if (ParallelDescriptor::IOProcessor()) { char header[6]; header[5] = '\0'; @@ -107,18 +208,19 @@ STLtools::read_stl_file (std::string const& fname, Real scale, Array con } int is_binary = std::strcmp(header, "solid"); if (is_binary) { - read_binary_stl_file(fname, scale, center, reverse_normal); + read_binary_stl_file(fname, scale, center, reverse_normal, tri_pts); } else { - read_ascii_stl_file(fname, scale, center, reverse_normal); + read_ascii_stl_file(fname, scale, center, reverse_normal, tri_pts); } } - prepare(); + prepare(std::move(tri_pts)); } void STLtools::read_binary_stl_file (std::string const& fname, Real scale, - Array const& center, int reverse_normal) + Array const& center, int reverse_normal, + Gpu::PinnedVector& a_tri_pts) { if (ParallelDescriptor::IOProcessor()) { if (amrex::Verbose()) { @@ -140,9 +242,13 @@ STLtools::read_binary_stl_file (std::string const& fname, Real scale, uint32_t numtris; // uint32 - Number of triangles - 4 bytes amrex::readIntData(&numtris, 1, is, uint32_descr); - AMREX_ASSERT(numtris < uint32_t(std::numeric_limits::max())); + AMREX_ALWAYS_ASSERT(numtris < uint32_t(std::numeric_limits::max())); m_num_tri = static_cast(numtris); - m_tri_pts_h.resize(m_num_tri); + // maximum number of triangles allowed for traversing the BVH tree + // using stack. + int max_tri_stack = Math::powi(m_bvh_max_splits)*m_bvh_max_size; + AMREX_ALWAYS_ASSERT(m_num_tri <= max_tri_stack); + a_tri_pts.resize(m_num_tri); if (amrex::Verbose()) { Print() << " Number of triangles: " << m_num_tri << "\n"; @@ -150,7 +256,7 @@ STLtools::read_binary_stl_file (std::string const& fname, Real scale, for (int i=0; i < m_num_tri; ++i) { is.read(tmp, 50); // 50 bytes for each triangle. Vertex 1 starts at 12 bytes. - Real* p = &(m_tri_pts_h[i].v1.x); + Real* p = &(a_tri_pts[i].v1.x); RealDescriptor::convertToNativeFormat(p, 9, tmp+12, real32_descr); for (int j = 0; j < 3; ++j) { p[0] = p[0] * scale + center[0]; @@ -159,7 +265,7 @@ STLtools::read_binary_stl_file (std::string const& fname, Real scale, p += 3; } if (reverse_normal) { - std::swap(m_tri_pts_h[i].v1, m_tri_pts_h[i].v2); + std::swap(a_tri_pts[i].v1, a_tri_pts[i].v2); } } } @@ -167,7 +273,8 @@ STLtools::read_binary_stl_file (std::string const& fname, Real scale, void STLtools::read_ascii_stl_file (std::string const& fname, Real scale, - Array const& center, int reverse_normal) + Array const& center, int reverse_normal, + Gpu::PinnedVector& a_tri_pts) { if (ParallelDescriptor::IOProcessor()) { if (amrex::Verbose()) { @@ -200,9 +307,9 @@ STLtools::read_ascii_stl_file (std::string const& fname, Real scale, } m_num_tri = nlines / nlines_per_facet; - m_tri_pts_h.resize(m_num_tri); + a_tri_pts.resize(m_num_tri); static_assert(sizeof(Triangle) == sizeof(Real)*9, "sizeof(Triangle) is wrong"); - Real* p = &(m_tri_pts_h[0].v1.x); + Real* p = &(a_tri_pts[0].v1.x); if (amrex::Verbose()) { Print() << " Number of triangles: " << m_num_tri << "\n"; @@ -230,45 +337,49 @@ STLtools::read_ascii_stl_file (std::string const& fname, Real scale, std::getline(is,tmp); //end facet if (reverse_normal) { - std::swap(m_tri_pts_h[i].v1, m_tri_pts_h[i].v2); + std::swap(a_tri_pts[i].v1, a_tri_pts[i].v2); } } } } void -STLtools::prepare () +STLtools::prepare (Gpu::PinnedVector a_tri_pts) { + BL_PROFILE("STLtools::prepare"); + ParallelDescriptor::Bcast(&m_num_tri, 1); if (!ParallelDescriptor::IOProcessor()) { - m_tri_pts_h.resize(m_num_tri); + a_tri_pts.resize(m_num_tri); } - ParallelDescriptor::Bcast((char*)(m_tri_pts_h.dataPtr()), m_num_tri*sizeof(Triangle)); + ParallelDescriptor::Bcast((char*)(a_tri_pts.dataPtr()), m_num_tri*sizeof(Triangle)); - //device vectors - m_tri_pts_d.resize(m_num_tri); - m_tri_normals_d.resize(m_num_tri); + Gpu::PinnedVector bvh_nodes; + if (m_bvh_optimization) { + BL_PROFILE("STLtools::build_bvh"); + build_bvh(a_tri_pts.data(), a_tri_pts.data()+a_tri_pts.size(), bvh_nodes); +#ifdef AMREX_USE_GPU + m_bvh_nodes.resize(bvh_nodes.size()); + Gpu::copyAsync(Gpu::hostToDevice, bvh_nodes.begin(), bvh_nodes.end(), + m_bvh_nodes.begin()); +#else + m_bvh_nodes = std::move(bvh_nodes); +#endif + } - Gpu::copyAsync(Gpu::hostToDevice, m_tri_pts_h.begin(), m_tri_pts_h.end(), - m_tri_pts_d.begin()); + auto const tri0 = a_tri_pts[0]; +#ifdef AMREX_USE_GPU + m_tri_pts_d.resize(m_num_tri); + Gpu::copyAsync(Gpu::hostToDevice, a_tri_pts.begin(), a_tri_pts.end(), + m_tri_pts_d.begin()); +#else + m_tri_pts_d = std::move(a_tri_pts); +#endif Triangle const* tri_pts = m_tri_pts_d.data(); - XDim3* tri_norm = m_tri_normals_d.data(); - // Compute normals in case the STL file does not have valid data for normals - ParallelFor(m_num_tri, [=] AMREX_GPU_DEVICE (int i) noexcept - { - Triangle const& tri = tri_pts[i]; - XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z}; - XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z}; - XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y, - vec1.z*vec2.x-vec1.x*vec2.z, - vec1.x*vec2.y-vec1.y*vec2.x}; - Real tmp = 1._rt / std::sqrt(norm.x*norm.x + norm.y*norm.y + norm.z*norm.z); - tri_norm[i].x = norm.x * tmp; - tri_norm[i].y = norm.y * tmp; - tri_norm[i].z = norm.z * tmp; - }); + m_tri_normals_d.resize(m_num_tri); + XDim3* tri_norm = m_tri_normals_d.data(); ReduceOps reduce_op; ReduceData reduce_data(reduce_op); @@ -276,6 +387,7 @@ STLtools::prepare () reduce_op.eval(m_num_tri, reduce_data, [=] AMREX_GPU_DEVICE (int i) -> ReduceTuple { + tri_norm[i] = triangle_norm(tri_pts[i]); return {amrex::min(tri_pts[i].v1.x, tri_pts[i].v2.x, tri_pts[i].v3.x), @@ -309,24 +421,12 @@ STLtools::prepare () // Choose a reference point by extending the normal vector of the first // triangle until it's slightly outside the bounding box. - XDim3 cent0; // centroid of the first triangle + XDim3 cent0{tri0.cent(0), tri0.cent(1), tri0.cent(2)}; int is_ref_positive; { - Triangle const& tri = m_tri_pts_h[0]; - cent0 = XDim3{(tri.v1.x + tri.v2.x + tri.v3.x) / 3._rt, - (tri.v1.y + tri.v2.y + tri.v3.y) / 3._rt, - (tri.v1.z + tri.v2.z + tri.v3.z) / 3._rt}; // We are computing the normal ourselves in case the stl file does // not have valid data on normal. - XDim3 vec1{tri.v2.x-tri.v1.x, tri.v2.y-tri.v1.y, tri.v2.z-tri.v1.z}; - XDim3 vec2{tri.v3.x-tri.v2.x, tri.v3.y-tri.v2.y, tri.v3.z-tri.v2.z}; - XDim3 norm{vec1.y*vec2.z-vec1.z*vec2.y, - vec1.z*vec2.x-vec1.x*vec2.z, - vec1.x*vec2.y-vec1.y*vec2.x}; - Real tmp = 1._rt / std::sqrt(norm.x*norm.x + norm.y*norm.y + norm.z*norm.z); - norm.x *= tmp; - norm.y *= tmp; - norm.z *= tmp; + XDim3 norm = triangle_norm(tri0); // Now we need to find out where the normal vector will intersect // with the bounding box defined by m_ptmin and m_ptmax. Real Lx, Ly, Lz; @@ -415,10 +515,96 @@ STLtools::prepare () m_boundry_is_outside = num_isects % 2 == 0; } +void +STLtools::build_bvh (Triangle* begin, Triangle* end, Gpu::PinnedVector& bvh_nodes) +{ + auto ntri = int(end - begin); + + if (ntri <= m_bvh_max_size) { + // This is a leaf node + bvh_nodes.push_back(Node()); + auto& node = bvh_nodes.back(); + auto& bbox = node.boundingbox; + for (int tr = 0; tr < ntri; ++tr) { + auto const& tri = begin[tr]; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + auto const& [xmin,xmax] = tri.minmax(idim); + bbox.setLo(idim,amrex::min(xmin, bbox.lo(idim))); + bbox.setHi(idim,amrex::max(xmax, bbox.hi(idim))); + } + node.triangles[tr] = tri; + node.trinorm[tr] = triangle_norm(tri); + } +#ifdef AMREX_USE_FLOAT + constexpr Real eps = Real(1.e-5); +#else + constexpr Real eps = Real(1.e-10); +#endif + Real small = eps*std::max({AMREX_D_DECL(bbox.length(0), + bbox.length(1), + bbox.length(2))}); + // Make bounding box slightly bigger for robustness. + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + bbox.setLo(idim,bbox.lo(idim)-small); + bbox.setHi(idim,bbox.hi(idim)+small); + } + node.ntriangles = ntri; + return; + } + + RealVect centmin(std::numeric_limits::max()); + RealVect centmax(std::numeric_limits::lowest()); + for (auto* p = begin; p != end; ++p) { + RealVect cent(AMREX_D_DECL(p->cent(0), p->cent(1), p->cent(2))); + centmin.min(cent); + centmax.max(cent); + } + int max_dir = (centmax-centmin).maxDir(false); + std::sort(begin, end, [max_dir] (Triangle const& a, Triangle const& b) -> bool + { return a.cent(max_dir) < b.cent(max_dir); }); + + int nsplits = std::min((ntri + (m_bvh_max_size-1)) / m_bvh_max_size, m_bvh_max_splits); + int tsize = ntri / nsplits; + int nleft = ntri - tsize*nsplits; + + bvh_nodes.push_back(Node()); + bvh_nodes.back().nchildren = nsplits; + int this_node = bvh_nodes.size()-1; + + for (int isplit = 0; isplit < nsplits; ++isplit) { + int tbegin, tend; + if (isplit < nleft) { + tbegin = isplit * (tsize+1); + tend = tbegin + tsize + 1; + } else { + tbegin = isplit * tsize + nleft; + tend = tbegin + tsize; + } + bvh_nodes[this_node].children[isplit] = int(bvh_nodes.size()); + build_bvh(begin+tbegin, begin+tend, bvh_nodes); + } + + // update bounding box + auto& node = bvh_nodes[this_node]; + for (int ichild = 0; ichild < node.nchildren; ++ichild) { + int inode = node.children[ichild]; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + auto lo = node.boundingbox.lo(idim); + auto hi = node.boundingbox.hi(idim); + auto clo = bvh_nodes[inode].boundingbox.lo(idim); + auto chi = bvh_nodes[inode].boundingbox.hi(idim); + node.boundingbox.setLo(idim, std::min(lo,clo)); + node.boundingbox.setHi(idim, std::max(hi,chi)); + } + } +} + void STLtools::fill (MultiFab& mf, IntVect const& nghost, Geometry const& geom, Real outside_value, Real inside_value) const { + BL_PROFILE("STLtools::fill"); + int num_triangles = m_num_tri; const auto plo = geom.ProbLoArray(); @@ -432,8 +618,15 @@ STLtools::fill (MultiFab& mf, IntVect const& nghost, Geometry const& geom, Real other_value = m_boundry_is_outside ? inside_value : outside_value; auto const& ma = mf.arrays(); + auto const* bvh_root = m_bvh_nodes.data(); - ParallelFor(mf, nghost, [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept + enum bvh_opt_options : int { no_bvh, yes_bvh }; + int bvh_opt_runtime_option = m_bvh_optimization ? yes_bvh : no_bvh; + + AnyCTO(TypeList>{}, + {bvh_opt_runtime_option}, + [&] (auto cto_func) { ParallelFor(mf, nghost, cto_func); }, + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k, auto control) noexcept { Real coords[3]; coords[0]=plo[0]+static_cast(i)*dx[0]; @@ -449,20 +642,40 @@ STLtools::fill (MultiFab& mf, IntVect const& nghost, Geometry const& geom, coords[2] >= ptmin.z && coords[2] <= ptmax.z) { Real pr[]={ptref.x, ptref.y, ptref.z}; - for (int tr=0; tr < num_triangles; ++tr) { - if (line_tri_intersects(pr, coords, tri_pts[tr])) { - ++num_intersects; +#ifdef AMREX_USE_CUDA + amrex::ignore_unused(bvh_root, num_triangles, tri_pts); +#endif + if constexpr (control == yes_bvh) { + bvh_line_tri_intersects(pr, coords, bvh_root, + [&] (int ntri, Triangle const* tri, + XDim3 const*) -> int + { + for (int tr=0; tr < ntri; ++tr) { + if (line_tri_intersects(pr, coords, tri[tr])) { + ++num_intersects; + } + } + return 0; + }); + } else { + for (int tr=0; tr < num_triangles; ++tr) { + if (line_tri_intersects(pr, coords, tri_pts[tr])) { + ++num_intersects; + } } } } ma[box_no](i,j,k) = (num_intersects % 2 == 0) ? reference_value : other_value; }); + Gpu::streamSynchronize(); } int STLtools::getBoxType (Box const& box, Geometry const& geom, RunOn) const { + BL_PROFILE("STLtools::getBoxType"); + const auto plo = geom.ProbLoArray(); const auto dx = geom.CellSizeArray(); @@ -498,11 +711,19 @@ STLtools::getBoxType (Box const& box, Geometry const& geom, RunOn) const XDim3 ptref = m_ptref; int ref_value = m_boundry_is_outside ? 1 : 0; + auto const* bvh_root = m_bvh_nodes.data(); + ReduceOps reduce_op; ReduceData reduce_data(reduce_op); using ReduceTuple = typename decltype(reduce_data)::Type; - reduce_op.eval(box, reduce_data, - [=] AMREX_GPU_DEVICE (int i, int j, int k) -> ReduceTuple + + enum bvh_opt_options : int { no_bvh, yes_bvh }; + int bvh_opt_runtime_option = m_bvh_optimization ? yes_bvh : no_bvh; + + AnyCTO(TypeList>{}, + {bvh_opt_runtime_option}, + [&] (auto cto_func) { reduce_op.eval(box, reduce_data, cto_func); }, + [=] AMREX_GPU_DEVICE (int i, int j, int k, auto control) -> ReduceTuple { Real coords[3]; coords[0]=plo[0]+static_cast(i)*dx[0]; @@ -519,15 +740,33 @@ STLtools::getBoxType (Box const& box, Geometry const& geom, RunOn) const coords[2] >= ptmin.z && coords[2] <= ptmax.z) { Real pr[]={ptref.x, ptref.y, ptref.z}; - for (int tr=0; tr < num_triangles; ++tr) { - if (line_tri_intersects(pr, coords, tri_pts[tr])) { - ++num_intersects; +#ifdef AMREX_USE_CUDA + amrex::ignore_unused(bvh_root,num_triangles,tri_pts); +#endif + if constexpr (control == yes_bvh) { + bvh_line_tri_intersects(pr, coords, bvh_root, + [&] (int ntri, Triangle const* tri, + XDim3 const*) -> int + { + for (int tr=0; tr < ntri; ++tr) { + if (line_tri_intersects(pr, coords, tri[tr])) { + ++num_intersects; + } + } + return 0; + }); + } else { + for (int tr=0; tr < num_triangles; ++tr) { + if (line_tri_intersects(pr, coords, tri_pts[tr])) { + ++num_intersects; + } } } } return (num_intersects % 2 == 0) ? ref_value : 1-ref_value; }); + ReduceTuple hv = reduce_data.value(reduce_op); Long nfluid = static_cast(amrex::get<0>(hv)); Long npts = box.numPts(); @@ -556,9 +795,17 @@ STLtools::fillFab (BaseFab& levelset, const Geometry& geom, RunOn, Box con Real reference_value = m_boundry_is_outside ? -1.0_rt : 1.0_rt; Real other_value = m_boundry_is_outside ? 1.0_rt : -1.0_rt; + auto const* bvh_root = m_bvh_nodes.data(); + auto const& a = levelset.array(); const Box& bx = levelset.box(); - ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + + enum bvh_opt_options : int { no_bvh, yes_bvh }; + int bvh_opt_runtime_option = m_bvh_optimization ? yes_bvh : no_bvh; + + ParallelFor(TypeList>{}, + {bvh_opt_runtime_option}, + bx, [=] AMREX_GPU_DEVICE (int i, int j, int k, auto control) noexcept { Real coords[3]; coords[0]=plo[0]+static_cast(i)*dx[0]; @@ -574,9 +821,26 @@ STLtools::fillFab (BaseFab& levelset, const Geometry& geom, RunOn, Box con coords[2] >= ptmin.z && coords[2] <= ptmax.z) { Real pr[]={ptref.x, ptref.y, ptref.z}; - for (int tr=0; tr < num_triangles; ++tr) { - if (line_tri_intersects(pr, coords, tri_pts[tr])) { - ++num_intersects; +#ifdef AMREX_USE_CUDA + amrex::ignore_unused(bvh_root,num_triangles,tri_pts); +#endif + if constexpr (control == yes_bvh) { + bvh_line_tri_intersects(pr, coords, bvh_root, + [&] (int ntri, Triangle const* tri, + XDim3 const*) -> int + { + for (int tr=0; tr < ntri; ++tr) { + if (line_tri_intersects(pr, coords, tri[tr])) { + ++num_intersects; + } + } + return 0; + }); + } else { + for (int tr=0; tr < num_triangles; ++tr) { + if (line_tri_intersects(pr, coords, tri_pts[tr])) { + ++num_intersects; + } } } } @@ -597,13 +861,22 @@ STLtools::getIntercept (Array,AMREX_SPACEDIM> const& inter_arr, const Triangle* tri_pts = m_tri_pts_d.data(); const XDim3* tri_norm = m_tri_normals_d.data(); + const Node* bvh_root = m_bvh_nodes.data(); + + enum bvh_opt_options : int { no_bvh, yes_bvh }; + int bvh_opt_runtime_option = m_bvh_optimization ? yes_bvh : no_bvh; for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { Array4 const& inter = inter_arr[idim]; Array4 const& type = type_arr[idim]; const Box bx{inter}; - ParallelFor(bx, [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + ParallelFor(TypeList>{}, + {bvh_opt_runtime_option}, + bx, [=] AMREX_GPU_DEVICE (int i, int j, int k, auto bvh_control) noexcept { +#ifdef AMREX_USE_CUDA + amrex::ignore_unused(num_triangles,tri_pts,tri_norm,lst,bvh_root); +#endif Real r = std::numeric_limits::quiet_NaN(); if (type(i,j,k) == EB2::Type::irregular) { XDim3 p1{plo[0]+static_cast(i)*dx[0], @@ -616,62 +889,143 @@ STLtools::getIntercept (Array,AMREX_SPACEDIM> const& inter_arr, }; if (idim == 0) { Real x2 = plo[0]+static_cast(i+1)*dx[0]; - int it; - for (it=0; it < num_triangles; ++it) { - auto const& tri = tri_pts[it]; - auto tmp = edge_tri_intersects(p1.x, x2, p1.y, p1.z, - tri.v1, tri.v2, tri.v3, - tri_norm[it], - lst(i+1,j,k)-lst(i,j,k)); - if (tmp.first) { - r = tmp.second; - break; + bool found = false; + if constexpr (bvh_control == no_bvh) { + for (int it=0; it < num_triangles; ++it) { + auto const& tri = tri_pts[it]; + auto tmp = edge_tri_intersects(p1.x, x2, p1.y, p1.z, + tri.v1, tri.v2, tri.v3, + tri_norm[it], + lst(i+1,j,k)-lst(i,j,k)); + if (tmp.first) { + r = tmp.second; + found = true; + break; + } } + } else { + Real a[3] = {p1.x , p1.y, p1.z}; + Real b[3] = { x2, p1.y, p1.z}; + bvh_line_tri_intersects(a, b, bvh_root, + [&] (int ntri, Triangle const* ptri, + XDim3 const* ptrinorm) -> int + { + for (int it=0; it < ntri; ++it) { + auto const& tri = ptri[it]; + auto tmp = edge_tri_intersects(p1.x, x2, p1.y, p1.z, + tri.v1, tri.v2, tri.v3, + ptrinorm[it], + lst(i+1,j,k)-lst(i,j,k)); + if (tmp.first) { + r = tmp.second; + found = true; + return 1; + } + } + return 0; + }); } - if (it == num_triangles) { + if (!found) { r = (lst(i,j,k) > 0._rt) ? p1.x : x2; } } else if (idim == 1) { Real y2 = plo[1]+static_cast(j+1)*dx[1]; - int it; - for (it=0; it < num_triangles; ++it) { - auto const& tri = tri_pts[it]; - auto const& norm = tri_norm[it]; - auto tmp = edge_tri_intersects(p1.y, y2, p1.z, p1.x, - {tri.v1.y, tri.v1.z, tri.v1.x}, - {tri.v2.y, tri.v2.z, tri.v2.x}, - {tri.v3.y, tri.v3.z, tri.v3.x}, - { norm.y, norm.z, norm.x}, - lst(i,j+1,k)-lst(i,j,k)); - if (tmp.first) { - r = tmp.second; - break; + bool found = false; + if constexpr (bvh_control == no_bvh) { + for (int it=0; it < num_triangles; ++it) { + auto const& tri = tri_pts[it]; + auto const& norm = tri_norm[it]; + auto tmp = edge_tri_intersects(p1.y, y2, p1.z, p1.x, + {tri.v1.y, tri.v1.z, tri.v1.x}, + {tri.v2.y, tri.v2.z, tri.v2.x}, + {tri.v3.y, tri.v3.z, tri.v3.x}, + { norm.y, norm.z, norm.x}, + lst(i,j+1,k)-lst(i,j,k)); + if (tmp.first) { + r = tmp.second; + found = true; + break; + } } + } else { + Real a[3] = {p1.x, p1.y , p1.z}; + Real b[3] = {p1.x, y2, p1.z}; + bvh_line_tri_intersects(a, b, bvh_root, + [&] (int ntri, Triangle const* ptri, + XDim3 const* ptrinorm) -> int + { + for (int it=0; it < ntri; ++it) { + auto const& tri = ptri[it]; + auto const& norm = ptrinorm[it]; + auto tmp = edge_tri_intersects(p1.y, y2, p1.z, p1.x, + {tri.v1.y, tri.v1.z, tri.v1.x}, + {tri.v2.y, tri.v2.z, tri.v2.x}, + {tri.v3.y, tri.v3.z, tri.v3.x}, + { norm.y, norm.z, norm.x}, + lst(i,j+1,k)-lst(i,j,k)); + if (tmp.first) { + r = tmp.second; + found = true; + return 1; + } + } + return 0; + }); } - if (it == num_triangles) { + if (!found) { r = (lst(i,j,k) > 0._rt) ? p1.y : y2; } - } else { + } +#if (AMREX_SPACEDIM == 3) + else { Real z2 = plo[2]+static_cast(k+1)*dx[2]; - int it; - for (it=0; it < num_triangles; ++it) { - auto const& tri = tri_pts[it]; - auto const& norm = tri_norm[it]; - auto tmp = edge_tri_intersects(p1.z, z2, p1.x, p1.y, - {tri.v1.z, tri.v1.x, tri.v1.y}, - {tri.v2.z, tri.v2.x, tri.v2.y}, - {tri.v3.z, tri.v3.x, tri.v3.y}, - { norm.z, norm.x, norm.y}, - lst(i,j,k+1)-lst(i,j,k)); - if (tmp.first) { - r = tmp.second; - break; + bool found = false; + if constexpr (bvh_control == no_bvh) { + for (int it=0; it < num_triangles; ++it) { + auto const& tri = tri_pts[it]; + auto const& norm = tri_norm[it]; + auto tmp = edge_tri_intersects(p1.z, z2, p1.x, p1.y, + {tri.v1.z, tri.v1.x, tri.v1.y}, + {tri.v2.z, tri.v2.x, tri.v2.y}, + {tri.v3.z, tri.v3.x, tri.v3.y}, + { norm.z, norm.x, norm.y}, + lst(i,j,k+1)-lst(i,j,k)); + if (tmp.first) { + r = tmp.second; + found = true; + break; + } } + } else { + Real a[3] = {p1.x, p1.y, p1.z }; + Real b[3] = {p1.x, p1.y, z2}; + bvh_line_tri_intersects(a, b, bvh_root, + [&] (int ntri, Triangle const* ptri, + XDim3 const* ptrinorm) -> int + { + for (int it=0; it < ntri; ++it) { + auto const& tri = ptri[it]; + auto const& norm = ptrinorm[it]; + auto tmp = edge_tri_intersects(p1.z, z2, p1.x, p1.y, + {tri.v1.z, tri.v1.x, tri.v1.y}, + {tri.v2.z, tri.v2.x, tri.v2.y}, + {tri.v3.z, tri.v3.x, tri.v3.y}, + { norm.z, norm.x, norm.y}, + lst(i,j,k+1)-lst(i,j,k)); + if (tmp.first) { + r = tmp.second; + found = true; + return 1; + } + } + return 0; + }); } - if (it == num_triangles) { + if (!found) { r = (lst(i,j,k) > 0._rt) ? p1.z : z2; } } +#endif } inter(i,j,k) = r; }); @@ -723,7 +1077,7 @@ STLtools::updateIntercept (Array,AMREX_SPACEDIM> const& inter_arr, (lst(i,j,k) > Real(0.0) && is_nan)) { inter(i,j,k) = problo[2] + static_cast(k)*dx[2]; - } + } else if (lst(i,j,k+1) == Real(0.0) || (lst(i,j,k+1) > Real(0.0) && is_nan)) { diff --git a/Src/EB/AMReX_EB_triGeomOps_K.H b/Src/EB/AMReX_EB_triGeomOps_K.H index 25a803892bf..b753f04f5dd 100644 --- a/Src/EB/AMReX_EB_triGeomOps_K.H +++ b/Src/EB/AMReX_EB_triGeomOps_K.H @@ -92,9 +92,9 @@ namespace amrex::tri_geom_ops CrossProd(v1,v2,n); - centr[0]=Real(0.333333)*(P1[0]+P2[0]+P3[0]); - centr[1]=Real(0.333333)*(P1[1]+P2[1]+P3[1]); - centr[2]=Real(0.333333)*(P1[2]+P2[2]+P3[2]); + centr[0]=Real(1./3.)*(P1[0]+P2[0]+P3[0]); + centr[1]=Real(1./3.)*(P1[1]+P2[1]+P3[1]); + centr[2]=Real(1./3.)*(P1[2]+P2[2]+P3[2]); getvec(centr,testp,c_tp_vec); magn=std::sqrt(n[0]*n[0]+n[1]*n[1]+n[2]*n[2]); @@ -166,13 +166,13 @@ namespace amrex::tri_geom_ops break; } - if(plane_eq_mid*plane_eq1 < 0.0) + if(plane_eq_mid*plane_eq1 < Real(0.0)) { p2[0]=midp[0]; p2[1]=midp[1]; p2[2]=midp[2]; } - else if(plane_eq_mid*plane_eq2 < 0.0) + else if(plane_eq_mid*plane_eq2 < Real(0.0)) { p1[0]=midp[0]; p1[1]=midp[1]; @@ -182,7 +182,7 @@ namespace amrex::tri_geom_ops //or error: p1,midp and p2 are on the same side //which is not what this function is meant for { - if(plane_eq_mid*plane_eq1 > 0.0 && plane_eq_mid*plane_eq2 > 0.0) + if(plane_eq_mid*plane_eq1 > Real(0.0) && plane_eq_mid*plane_eq2 > Real(0.0)) { all_ok=false; } @@ -233,11 +233,11 @@ namespace amrex::tri_geom_ops } } //proper and edge intersection - else if( (S1 < 0.0 && S2 < 0.0 && S3 < 0.0) || - (S1 > 0.0 && S2 > 0.0 && S3 > 0.0) || - (std::abs(S1) < eps && S2*S3 > 0.0) || //S1=0 - (std::abs(S2) < eps && S3*S1 > 0.0) || //S2=0 - (std::abs(S3) < eps && S1*S2 > 0.0) ) //S3=0 + else if( (S1 < Real(0.0) && S2 < Real(0.0) && S3 < Real(0.0)) || + (S1 > Real(0.0) && S2 > Real(0.0) && S3 > Real(0.0)) || + (std::abs(S1) < eps && S2*S3 > Real(0.0)) || //S1=0 + (std::abs(S2) < eps && S3*S1 > Real(0.0)) || //S2=0 + (std::abs(S3) < eps && S1*S2 > Real(0.0)) ) //S3=0 { get_plucker_coords(v1,t1,L2); @@ -253,7 +253,7 @@ namespace amrex::tri_geom_ops ls_s1 = side_op(L4,L3); ls_s2 = side_op(L4,L2); - if(ls_s1*ls_s2 > 0.0) + if(ls_s1*ls_s2 > Real(0.0)) { no_intersections = 0; } From 0e7816e6bc024d76ff0ec92e66cd5165aaaeec9c Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Sep 2024 11:24:54 -0700 Subject: [PATCH 2/9] Fix warnings --- Src/EB/AMReX_EB_STL_utils.H | 4 ++-- Src/EB/AMReX_EB_STL_utils.cpp | 18 +++++++++--------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Src/EB/AMReX_EB_STL_utils.H b/Src/EB/AMReX_EB_STL_utils.H index 9cd4b3e84d3..c47e30759f8 100644 --- a/Src/EB/AMReX_EB_STL_utils.H +++ b/Src/EB/AMReX_EB_STL_utils.H @@ -20,13 +20,13 @@ public: struct Triangle { XDim3 v1, v2, v3; - Real cent (int d) const + [[nodiscard]] Real cent (int d) const { static_assert(sizeof(XDim3) == sizeof(Real)*3); return Real(1./3.)*((&v1.x)[d] + (&v2.x)[d] + (&v3.x)[d]); } - std::pair minmax (int d) const + [[nodiscard]] std::pair minmax (int d) const { static_assert(sizeof(XDim3) == sizeof(Real)*3); return std::minmax({(&v1.x)[d], (&v2.x)[d], (&v3.x)[d]}); diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index 7b61e35556b..2fea79b0da1 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -126,11 +126,11 @@ namespace { // with the actual bounding box. if (a[idim] == b[idim]) { continue; } Real xi[] = {box.lo(idim), box.hi(idim)}; - for (int face = 0; face < 2; ++face) { - if (!((a[idim] > xi[face] && b[idim] > xi[face]) || - (a[idim] < xi[face] && b[idim] < xi[face]))) + for (auto xface : xi) { + if (!((a[idim] > xface && b[idim] > xface) || + (a[idim] < xface && b[idim] < xface))) { - Real w = (xi[face]-a[idim]) / (b[idim]-a[idim]); + Real w = (xface-a[idim]) / (b[idim]-a[idim]); bool inside = true; for (int jdim = 0; jdim < AMREX_SPACEDIM; ++jdim) { if (idim != jdim) { @@ -151,7 +151,7 @@ namespace { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void bvh_line_tri_intersects (Real a[3], Real b[3], STLtools::BVHNodeT const* root, - F&& f) + F const& f) { // Use stack to avoid recursion Stack nodes_to_do; @@ -172,7 +172,7 @@ namespace { } else { auto& ndone = nchildren_done.top(); if (ndone < node.nchildren) { - for (int ichild = ndone; ichild < node.nchildren; ++ichild) { + for (auto ichild = ndone; ichild < node.nchildren; ++ichild) { ++ndone; int inode = node.children[ichild]; if (line_box_intersects(a, b, root[inode].boundingbox)) { @@ -548,7 +548,7 @@ STLtools::build_bvh (Triangle* begin, Triangle* end, Gpu::PinnedVector& bv bbox.setLo(idim,bbox.lo(idim)-small); bbox.setHi(idim,bbox.hi(idim)+small); } - node.ntriangles = ntri; + node.ntriangles = int(ntri); return; } @@ -568,8 +568,8 @@ STLtools::build_bvh (Triangle* begin, Triangle* end, Gpu::PinnedVector& bv int nleft = ntri - tsize*nsplits; bvh_nodes.push_back(Node()); - bvh_nodes.back().nchildren = nsplits; - int this_node = bvh_nodes.size()-1; + bvh_nodes.back().nchildren = std::int8_t(nsplits); + auto this_node = bvh_nodes.size()-1; for (int isplit = 0; isplit < nsplits; ++isplit) { int tbegin, tend; From 3420c31b2789455e6876bafce9549b8ce054bbc4 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Sep 2024 12:24:26 -0700 Subject: [PATCH 3/9] Fix warning --- Src/EB/AMReX_EB_STL_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index 2fea79b0da1..9a1a38318ae 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -548,7 +548,7 @@ STLtools::build_bvh (Triangle* begin, Triangle* end, Gpu::PinnedVector& bv bbox.setLo(idim,bbox.lo(idim)-small); bbox.setHi(idim,bbox.hi(idim)+small); } - node.ntriangles = int(ntri); + node.ntriangles = int(ntri); // NOLINT return; } From d05d366d35610127258f719f7bd7269b2ca62ff0 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Sep 2024 12:28:44 -0700 Subject: [PATCH 4/9] Add reference --- Src/EB/AMReX_EB_STL_utils.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index 9a1a38318ae..5ba1f29fdc7 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -8,6 +8,8 @@ #include #include +// Reference for BVH: https://rmrsk.github.io/EBGeometry/Concepts.html#bounding-volume-hierarchies + namespace amrex { From 36429835ebf29e745c97806d858a3e49111d377f Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Sep 2024 14:02:14 -0700 Subject: [PATCH 5/9] minor --- Src/EB/AMReX_EB_STL_utils.H | 2 +- Src/EB/AMReX_EB_STL_utils.cpp | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/Src/EB/AMReX_EB_STL_utils.H b/Src/EB/AMReX_EB_STL_utils.H index c47e30759f8..d64c6512393 100644 --- a/Src/EB/AMReX_EB_STL_utils.H +++ b/Src/EB/AMReX_EB_STL_utils.H @@ -9,6 +9,7 @@ #include #include +#include #include namespace amrex @@ -59,7 +60,6 @@ public: static constexpr int mixedcells = 0; static constexpr int allcovered = 1; - // xxxxx yes, no, auto void setBVHOptimization (bool flag) { m_bvh_optimization = flag; } void read_stl_file (std::string const& fname, Real scale, Array const& center, diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index 5ba1f29fdc7..6529c3db83e 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -6,7 +6,6 @@ #include #include -#include // Reference for BVH: https://rmrsk.github.io/EBGeometry/Concepts.html#bounding-volume-hierarchies @@ -669,7 +668,6 @@ STLtools::fill (MultiFab& mf, IntVect const& nghost, Geometry const& geom, } ma[box_no](i,j,k) = (num_intersects % 2 == 0) ? reference_value : other_value; }); - Gpu::streamSynchronize(); } @@ -768,7 +766,6 @@ STLtools::getBoxType (Box const& box, Geometry const& geom, RunOn) const return (num_intersects % 2 == 0) ? ref_value : 1-ref_value; }); - ReduceTuple hv = reduce_data.value(reduce_op); Long nfluid = static_cast(amrex::get<0>(hv)); Long npts = box.numPts(); From e1d653fa8dcf13c4f01abf4c039e4d4d77776758 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Sep 2024 14:26:35 -0700 Subject: [PATCH 6/9] const --- Src/EB/AMReX_EB_STL_utils.cpp | 4 ++-- Src/EB/AMReX_EB_triGeomOps_K.H | 16 ++++++++-------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index 6529c3db83e..c27b5413dc4 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -28,7 +28,7 @@ namespace { // Does line ab intersect with the triangle? AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - bool line_tri_intersects (Real a[3], Real b[3], STLtools::Triangle const& tri) + bool line_tri_intersects (Real const a[3], Real const b[3], STLtools::Triangle const& tri) { if (amrex::max(a[0],b[0]) < amrex::min(tri.v1.x,tri.v2.x,tri.v3.x) || amrex::min(a[0],b[0]) > amrex::max(tri.v1.x,tri.v2.x,tri.v3.x) || @@ -150,7 +150,7 @@ namespace { template AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE - void bvh_line_tri_intersects (Real a[3], Real b[3], + void bvh_line_tri_intersects (Real const a[3], Real const b[3], STLtools::BVHNodeT const* root, F const& f) { diff --git a/Src/EB/AMReX_EB_triGeomOps_K.H b/Src/EB/AMReX_EB_triGeomOps_K.H index b753f04f5dd..7ab517efe9e 100644 --- a/Src/EB/AMReX_EB_triGeomOps_K.H +++ b/Src/EB/AMReX_EB_triGeomOps_K.H @@ -63,8 +63,8 @@ namespace amrex::tri_geom_ops L[5] = v2[1] - v1[1]; } //================================================================================ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void side_op3(Real v1[3],Real v2[3], - Real t1[3],Real t2[3],Real t3[3], + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void side_op3(const Real v1[3], const Real v2[3], + const Real t1[3], const Real t2[3], const Real t3[3], Real &S1, Real &S2, Real &S3) { @@ -81,8 +81,8 @@ namespace amrex::tri_geom_ops } //================================================================================ //get normal of triangle pointing at a test-point - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void tri_n(Real P1[3],Real P2[3],Real P3[3], - Real testp[3],Real n[3]) + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void tri_n(const Real P1[3], const Real P2[3], const Real P3[3], + const Real testp[3], Real n[3]) { Real v1[3],v2[3],magn; Real centr[3],c_tp_vec[3]; @@ -109,7 +109,7 @@ namespace amrex::tri_geom_ops n[2]=n[2]/magn; } //================================================================================ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real triangle_area(Real P1[3],Real P2[3],Real P3[3]) + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE Real triangle_area(const Real P1[3], const Real P2[3], const Real P3[3]) { Real v1[3],v2[3],area[3]; @@ -121,7 +121,7 @@ namespace amrex::tri_geom_ops //================================================================================ //this is only useful when v1-v2 segment intersects the triangle AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE bool find_intersection_point(const Real v1[3],const Real v2[3], - Real t1[3], Real t2[3], Real t3[3],Real ip[3],int bisect_iters=20,Real tol=1e-6) + const Real t1[3], const Real t2[3], const Real t3[3], Real ip[3],int bisect_iters=20,Real tol=1e-6) { Real plane_eq_mid,plane_eq1,plane_eq2; @@ -197,8 +197,8 @@ namespace amrex::tri_geom_ops return(all_ok); } //================================================================================ - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int lineseg_tri_intersect(Real v1[3],Real v2[3], - Real t1[3],Real t2[3],Real t3[3]) + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int lineseg_tri_intersect(const Real v1[3], const Real v2[3], + const Real t1[3], const Real t2[3], const Real t3[3]) { //see plucker coordinates based method //https://members.loria.fr/SLazard/ARC-Visi3D/Pant-project/files/Line_Triangle.html From 3ab7b9d64730d1e7c7e1d1e07a5f837715987175 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Sep 2024 16:21:00 -0700 Subject: [PATCH 7/9] reserve size --- Src/EB/AMReX_EB_STL_utils.H | 1 + Src/EB/AMReX_EB_STL_utils.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/Src/EB/AMReX_EB_STL_utils.H b/Src/EB/AMReX_EB_STL_utils.H index d64c6512393..828d5a120c2 100644 --- a/Src/EB/AMReX_EB_STL_utils.H +++ b/Src/EB/AMReX_EB_STL_utils.H @@ -109,6 +109,7 @@ private: Gpu::PinnedVector& a_tri_pts); static void build_bvh (Triangle* begin, Triangle * end, Gpu::PinnedVector& bvh_nodes); + static void bvh_size (int ntri, std::size_t& nnodes); }; } diff --git a/Src/EB/AMReX_EB_STL_utils.cpp b/Src/EB/AMReX_EB_STL_utils.cpp index c27b5413dc4..3a3070f188d 100644 --- a/Src/EB/AMReX_EB_STL_utils.cpp +++ b/Src/EB/AMReX_EB_STL_utils.cpp @@ -358,6 +358,9 @@ STLtools::prepare (Gpu::PinnedVector a_tri_pts) Gpu::PinnedVector bvh_nodes; if (m_bvh_optimization) { BL_PROFILE("STLtools::build_bvh"); + std::size_t nnodes = 0; + bvh_size(int(a_tri_pts.size()), nnodes); + bvh_nodes.reserve(nnodes); build_bvh(a_tri_pts.data(), a_tri_pts.data()+a_tri_pts.size(), bvh_nodes); #ifdef AMREX_USE_GPU m_bvh_nodes.resize(bvh_nodes.size()); @@ -600,6 +603,23 @@ STLtools::build_bvh (Triangle* begin, Triangle* end, Gpu::PinnedVector& bv } } +void +STLtools::bvh_size (int ntri, std::size_t& nnodes) +{ + ++nnodes; + + if (ntri <= m_bvh_max_size) { return; } // This is a leaf node + + int nsplits = std::min((ntri + (m_bvh_max_size-1)) / m_bvh_max_size, m_bvh_max_splits); + int tsize = ntri / nsplits; + int nleft = ntri - tsize*nsplits; + + for (int isplit = 0; isplit < nsplits; ++isplit) { + int child_size = (isplit < nleft) ? (tsize+1) : tsize; + bvh_size(child_size, nnodes); + } +} + void STLtools::fill (MultiFab& mf, IntVect const& nghost, Geometry const& geom, Real outside_value, Real inside_value) const From bddfa15416b1eb561834f76db07372bd16d46f73 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Sun, 8 Sep 2024 18:16:50 -0700 Subject: [PATCH 8/9] change ParmParse parameter to eb2.stl_use_bvh --- Src/EB/AMReX_EB2.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Src/EB/AMReX_EB2.cpp b/Src/EB/AMReX_EB2.cpp index 3684e87b473..4f7a5cb84b9 100644 --- a/Src/EB/AMReX_EB2.cpp +++ b/Src/EB/AMReX_EB2.cpp @@ -216,8 +216,8 @@ Build (const Geometry& geom, int required_coarsening_level, pp.queryAdd("stl_center", stl_center); bool stl_reverse_normal = false; pp.queryAdd("stl_reverse_normal", stl_reverse_normal); - bool stl_bvh_opt = true; - pp.queryAdd("stl_bvh_opt", stl_bvh_opt); + bool stl_use_bvh = true; + pp.queryAdd("stl_use_bvh", stl_use_bvh); IndexSpace::push(new IndexSpaceSTL(stl_file, stl_scale, // NOLINT(clang-analyzer-cplusplus.NewDeleteLeaks) {stl_center[0], stl_center[1], stl_center[2]}, int(stl_reverse_normal), @@ -226,7 +226,7 @@ Build (const Geometry& geom, int required_coarsening_level, build_coarse_level_by_coarsening, a_extend_domain_face, a_num_coarsen_opt, - stl_bvh_opt)); + stl_use_bvh)); } else { From 1e45bf057fd8985acc5f9f429699f43072d4857a Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 12 Sep 2024 19:06:46 -0700 Subject: [PATCH 9/9] Add suggestion to abort message --- Src/EB/AMReX_EB2_2D_C.cpp | 3 ++- Src/EB/AMReX_EB2_3D_C.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp index b99b5559c77..231faf0cb88 100644 --- a/Src/EB/AMReX_EB2_2D_C.cpp +++ b/Src/EB/AMReX_EB2_2D_C.cpp @@ -342,7 +342,8 @@ int build_faces (Box const& bx, Array4 const& cell, nsmallfaces += *(hp+1); if (*hp > 0 && !cover_multiple_cuts) { - amrex::Abort("amrex::EB2::build_faces: more than 2 cuts not supported"); + amrex::Abort("amrex::EB2::build_faces: more than 2 cuts not supported. " + "You can try to fix it by using runtime parameter eb2.cover_multiple_cuts=1."); } return *hp; diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp index 2d02e53bdc7..ec7d643391e 100644 --- a/Src/EB/AMReX_EB2_3D_C.cpp +++ b/Src/EB/AMReX_EB2_3D_C.cpp @@ -768,7 +768,8 @@ int build_faces (Box const& bx, Array4 const& cell, } }); } else { - amrex::Abort("amrex::EB2::build_faces: more than 2 cuts not supported"); + amrex::Abort("amrex::EB2::build_faces: more than 2 cuts not supported. " + "You can try to fix it by using runtime parameter eb2.cover_multiple_cuts=1."); } } @@ -932,7 +933,8 @@ void build_cells (Box const& bx, Array4 const& cell, if (nsmallcells > 0 || nmulticuts > 0) { if (!cover_multiple_cuts && nmulticuts > 0) { - amrex::Abort("amrex::EB2::build_cells: multi-cuts not supported"); + amrex::Abort("amrex::EB2::build_cells: multi-cuts not supported. " + "You can try to fix it by using runtime parameter eb2.cover_multiple_cuts=1."); } return; } else {