diff --git a/.github/workflows/cuda.yml b/.github/workflows/cuda.yml index 50d45846f02..9e96aefac5e 100644 --- a/.github/workflows/cuda.yml +++ b/.github/workflows/cuda.yml @@ -154,6 +154,7 @@ jobs: -DAMReX_ENABLE_TESTS=ON \ -DAMReX_TEST_TYPE=Small \ -DAMReX_FORTRAN=ON \ + -DAMReX_FORTRAN_INTERFACES=ON \ -DAMReX_GPU_BACKEND=CUDA \ -DCMAKE_C_COMPILER=$(which nvc) \ -DCMAKE_CXX_COMPILER=$(which nvc++) \ diff --git a/.github/workflows/style/check_tabs.sh b/.github/workflows/style/check_tabs.sh index f4418644e3a..e8bb9f14bc3 100755 --- a/.github/workflows/style/check_tabs.sh +++ b/.github/workflows/style/check_tabs.sh @@ -20,7 +20,7 @@ find . -type d \( -name .git \ -a ! -name "*.lex.h" -a ! -name "*.lex.nolint.H" \) \ \) \ -exec grep -Iq . {} \; \ - -exec sed -i 's/\t/\ \ \ \ /g' {} + + -exec perl -i -pe's/\t/\ \ \ \ /g' {} + gitdiff=`git diff` diff --git a/.github/workflows/style/check_trailing_whitespaces.sh b/.github/workflows/style/check_trailing_whitespaces.sh index a0f941f8c2b..a4ece8a0ee4 100755 --- a/.github/workflows/style/check_trailing_whitespaces.sh +++ b/.github/workflows/style/check_trailing_whitespaces.sh @@ -20,7 +20,7 @@ find . -type d \( -name .git \ -a ! -name "*.lex.h" -a ! -name "*.lex.nolint.H" \) \ \) \ -exec grep -Iq . {} \; \ - -exec sed -i 's/[[:blank:]]\+$//g' {} + + -exec perl -i -pe's/[[:blank:]]+$//g' {} + gitdiff=`git diff` diff --git a/Docs/sphinx_documentation/source/EB.rst b/Docs/sphinx_documentation/source/EB.rst index 9b847a3259c..2c78856f937 100644 --- a/Docs/sphinx_documentation/source/EB.rst +++ b/Docs/sphinx_documentation/source/EB.rst @@ -271,6 +271,12 @@ following data: // embedded boundary centroid const MultiCutFab& getBndryCent () const; + // embedded boundary normal direction + const MultiCutFab& getBndryNormal () const; + + // embedded boundary surface area + const MultiCutFab& getBndryArea () const; + // area fractions Array getAreaFrac () const; @@ -291,6 +297,17 @@ following data: of the data is in the range of :math:`[-0.5,0.5]`, based on each cell's local coordinates with respect to the regular cell's center. +- **Boundary normal** is in a :cpp:`MultiCutFab` with ``AMREX_SPACEDIM`` + components representing the unit vector pointing toward the covered part. + +- **Boundary area** is in a :cpp:`MultiCutFab` with a single component + representing the dimensionless boundary area. When the cell is isotropic + (i.e., :math:`\Delta x = \Delta y = \Delta z`), it's trivial to convert it + to physical units. If the cell size is anisotropic, the conversion + requires multiplying by a factor of :math:`\sqrt{(n_x \Delta y \Delta + z)^2 + (n_y \Delta x \Delta z)^2 + (n_z \Delta x \Delta y)^2}`, where + :math:`n` is the boundary normal vector. + - **Face centroid** is in a :cpp:`MultiCutFab` with ``AMREX_SPACEDIM`` components. Each component of the data is in the range of :math:`[-0.5,0.5]`, based on each cell's local coordinates with respect to the embedded boundary. diff --git a/Docs/sphinx_documentation/source/RuntimeParameters.rst b/Docs/sphinx_documentation/source/RuntimeParameters.rst index c57c74c8fb1..4e9f4196804 100644 --- a/Docs/sphinx_documentation/source/RuntimeParameters.rst +++ b/Docs/sphinx_documentation/source/RuntimeParameters.rst @@ -1234,5 +1234,5 @@ enabled. If this parameter is empty, the output of tiny profiling is dumped on the default out stream of AMReX. If it's not empty, it specifies the file - name for the output. Note that ``/dev/null`` is a special name that mean - a null file. + name for the output. Note that ``/dev/null`` is a special name that means + no output. diff --git a/Src/Base/AMReX_Arena.H b/Src/Base/AMReX_Arena.H index 2a6cbb25a08..51b5d983fdf 100644 --- a/Src/Base/AMReX_Arena.H +++ b/Src/Base/AMReX_Arena.H @@ -4,9 +4,21 @@ #include #include + +#ifdef AMREX_TINY_PROFILING +#include +#else +namespace amrex { + struct MemStat {}; +} +#endif + #include #include #include +#include +#include +#include #include namespace amrex { @@ -156,7 +168,7 @@ public: * \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler. * \param memory_name The name of this arena in the TinyProfiler output. */ - virtual void registerForProfiling (const std::string& memory_name); + void registerForProfiling (const std::string& memory_name); #ifdef AMREX_USE_GPU //! Is this GPU stream ordered memory allocator? @@ -199,6 +211,29 @@ protected: virtual std::size_t freeUnused_protected () { return 0; } void* allocate_system (std::size_t nbytes); void deallocate_system (void* p, std::size_t nbytes); + + struct ArenaProfiler { + //! If this arena is profiled by TinyProfiler + bool m_do_profiling = false; + //! Mutex for the profiling + std::mutex m_arena_profiler_mutex; + //! Data structure used for profiling with TinyProfiler + std::map m_profiling_stats; + //! Track the currently allocated memory, not used by CArena + std::unordered_map> m_currently_allocated; + + ~ArenaProfiler (); + ArenaProfiler () noexcept = default; + ArenaProfiler (const ArenaProfiler& rhs) = delete; + ArenaProfiler (ArenaProfiler&& rhs) = delete; + ArenaProfiler& operator= (const ArenaProfiler& rhs) = delete; + ArenaProfiler& operator= (ArenaProfiler&& rhs) = delete; + + void profile_alloc (void* ptr, std::size_t nbytes); + + void profile_free (void* ptr); + + } m_profiler; }; } diff --git a/Src/Base/AMReX_Arena.cpp b/Src/Base/AMReX_Arena.cpp index ce4ece3b643..2320ab8c664 100644 --- a/Src/Base/AMReX_Arena.cpp +++ b/Src/Base/AMReX_Arena.cpp @@ -117,9 +117,13 @@ Arena::hasFreeDeviceMemory (std::size_t) } void -Arena::registerForProfiling (const std::string&) +Arena::registerForProfiling ([[maybe_unused]] const std::string& memory_name) { - amrex::Abort("Profiling is not implemented for this type of Arena"); +#ifdef AMREX_TINY_PROFILING + AMREX_ALWAYS_ASSERT(m_profiler.m_do_profiling == false); + m_profiler.m_do_profiling = + TinyProfiler::RegisterArena(memory_name, m_profiler.m_profiling_stats); +#endif } std::size_t @@ -330,6 +334,7 @@ Arena::Initialize () } the_async_arena = new PArena(the_async_arena_release_threshold); + the_async_arena->registerForProfiling("Async Memory"); #ifdef AMREX_USE_GPU if (the_arena->isDevice()) { @@ -403,6 +408,7 @@ Arena::Initialize () } the_cpu_arena = The_BArena(); + the_cpu_arena->registerForProfiling("Cpu Memory"); // Initialize the null arena auto* null_arena = The_Null_Arena(); @@ -654,4 +660,46 @@ The_Comms_Arena () } } +#ifdef AMREX_TINY_PROFILING + +Arena::ArenaProfiler::~ArenaProfiler () +{ + if (m_do_profiling) { + TinyProfiler::DeregisterArena(m_profiling_stats); + } +} + +#else + +Arena::ArenaProfiler::~ArenaProfiler () = default; + +#endif + +void Arena::ArenaProfiler::profile_alloc ([[maybe_unused]] void* ptr, + [[maybe_unused]] std::size_t nbytes) { +#ifdef AMREX_TINY_PROFILING + if (m_do_profiling) { + std::lock_guard lock(m_arena_profiler_mutex); + MemStat* stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats); + if (stat) { + m_currently_allocated.insert({ptr, {stat, nbytes}}); + } + } +#endif +} + +void Arena::ArenaProfiler::profile_free ([[maybe_unused]] void* ptr) { +#ifdef AMREX_TINY_PROFILING + if (m_do_profiling) { + std::lock_guard lock(m_arena_profiler_mutex); + auto it = m_currently_allocated.find(ptr); + if (it != m_currently_allocated.end()) { + auto [stat, nbytes] = it->second; + TinyProfiler::memory_free(nbytes, stat); + m_currently_allocated.erase(it); + } + } +#endif +} + } diff --git a/Src/Base/AMReX_BArena.cpp b/Src/Base/AMReX_BArena.cpp index c22affa687a..054e64b854c 100644 --- a/Src/Base/AMReX_BArena.cpp +++ b/Src/Base/AMReX_BArena.cpp @@ -3,12 +3,15 @@ void* amrex::BArena::alloc (std::size_t sz_) { - return std::malloc(sz_); + void* pt = std::malloc(sz_); + m_profiler.profile_alloc(pt, sz_); + return pt; } void amrex::BArena::free (void* pt) { + m_profiler.profile_free(pt); std::free(pt); } diff --git a/Src/Base/AMReX_CArena.H b/Src/Base/AMReX_CArena.H index 9547bc92f21..bc46d008241 100644 --- a/Src/Base/AMReX_CArena.H +++ b/Src/Base/AMReX_CArena.H @@ -16,8 +16,6 @@ namespace amrex { -struct MemStat; - /** * \brief A Concrete Class for Dynamic Memory Management using first fit. * This is a coalescing memory manager. It allocates (possibly) large @@ -75,12 +73,6 @@ public: */ [[nodiscard]] bool hasFreeDeviceMemory (std::size_t sz) final; - /** - * \brief Add this Arena to the list of Arenas that are profiled by TinyProfiler. - * \param memory_name The name of this arena in the TinyProfiler output. - */ - void registerForProfiling (const std::string& memory_name) final; - //! The current amount of heap space used by the CArena object. std::size_t heap_space_used () const noexcept; @@ -191,10 +183,6 @@ protected: std::size_t m_used{0}; //! The amount of memory given out via alloc(). std::size_t m_actually_used{0}; - //! If this arena is profiled by TinyProfiler - bool m_do_profiling = false; - //! Data structure used for profiling with TinyProfiler - std::map m_profiling_stats; std::mutex carena_mutex; diff --git a/Src/Base/AMReX_CArena.cpp b/Src/Base/AMReX_CArena.cpp index 42987f47a86..bc5297f4d52 100644 --- a/Src/Base/AMReX_CArena.cpp +++ b/Src/Base/AMReX_CArena.cpp @@ -5,14 +5,6 @@ #include #include -#ifdef AMREX_TINY_PROFILING -#include -#else -namespace amrex { - struct MemStat {}; -} -#endif - #include #include #include @@ -32,12 +24,6 @@ CArena::~CArena () for (auto const& a : m_alloc) { deallocate_system(a.first, a.second); } - -#ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { - TinyProfiler::DeregisterArena(m_profiling_stats); - } -#endif } void* @@ -53,8 +39,8 @@ CArena::alloc_protected (std::size_t nbytes) { MemStat* stat = nullptr; #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { - stat = TinyProfiler::memory_alloc(nbytes, m_profiling_stats); + if (m_profiler.m_do_profiling) { + stat = TinyProfiler::memory_alloc(nbytes, m_profiler.m_profiling_stats); } #endif @@ -173,10 +159,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax) free_node.size(left_size); } #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { + if (m_profiler.m_do_profiling) { TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat()); auto* stat = TinyProfiler::memory_alloc(new_size, - m_profiling_stats); + m_profiler.m_profiling_stats); const_cast(*busy_it).mem_stat(stat); } #endif @@ -186,10 +172,10 @@ CArena::alloc_in_place (void* pt, std::size_t szmin, std::size_t szmax) } else if (total_size >= szmin) { m_freelist.erase(next_it); #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { + if (m_profiler.m_do_profiling) { TinyProfiler::memory_free(busy_it->size(), busy_it->mem_stat()); auto* stat = TinyProfiler::memory_alloc(total_size, - m_profiling_stats); + m_profiler.m_profiling_stats); const_cast(*busy_it).mem_stat(stat); } #endif @@ -255,9 +241,9 @@ CArena::shrink_in_place (void* pt, std::size_t new_size) m_actually_used -= leftover_size; #ifdef AMREX_TINY_PROFILING - if (m_do_profiling) { + if (m_profiler.m_do_profiling) { TinyProfiler::memory_free(old_size, busy_it->mem_stat()); - auto* stat = TinyProfiler::memory_alloc(new_size, m_profiling_stats); + auto* stat = TinyProfiler::memory_alloc(new_size, m_profiler.m_profiling_stats); const_cast(*busy_it).mem_stat(stat); } #endif @@ -431,15 +417,6 @@ CArena::hasFreeDeviceMemory (std::size_t sz) } } -void -CArena::registerForProfiling ([[maybe_unused]] const std::string& memory_name) -{ -#ifdef AMREX_TINY_PROFILING - m_do_profiling = true; - TinyProfiler::RegisterArena(memory_name, m_profiling_stats); -#endif -} - std::size_t CArena::heap_space_used () const noexcept { diff --git a/Src/Base/AMReX_CTOParallelForImpl.H b/Src/Base/AMReX_CTOParallelForImpl.H index 35e0ec3be7b..8f7e8ce567f 100644 --- a/Src/Base/AMReX_CTOParallelForImpl.H +++ b/Src/Base/AMReX_CTOParallelForImpl.H @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include @@ -18,7 +18,7 @@ namespace amrex { template struct CompileTimeOptions { - // TypeList is defined in AMReX_Tuple.H + // TypeList is defined in AMReX_TypeList.H using list_type = TypeList...>; }; @@ -26,65 +26,42 @@ struct CompileTimeOptions { namespace detail { - template - std::enable_if_t || std::is_same_v, bool> - ParallelFor_helper2 (T const& N, F const& f, TypeList, - std::array const& runtime_options) - { - if (runtime_options == std::array{As::value...}) { - if constexpr (std::is_integral_v) { - ParallelFor(N, [f] AMREX_GPU_DEVICE (T i) noexcept - { - f(i, As{}...); - }); - } else { - ParallelFor(N, [f] AMREX_GPU_DEVICE (int i, int j, int k) noexcept - { - f(i, j, k, As{}...); - }); - } - return true; - } else { - return false; + template + struct CTOWrapper { + F f; + + template + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + auto operator() (Args... args) const noexcept + -> decltype(f(args..., std::integral_constant{}...)) { + return f(args..., std::integral_constant{}...); } - } - template - std::enable_if_t, bool> - ParallelFor_helper2 (Box const& box, T ncomp, F const& f, TypeList, - std::array const& runtime_options) + AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE + static constexpr + std::array GetOptions () noexcept { + return {ctr...}; + } + }; + + template + bool + AnyCTO_helper2 (const L& l, const F& f, TypeList, + std::array const& runtime_options) { if (runtime_options == std::array{As::value...}) { - ParallelFor(box, ncomp, [f] AMREX_GPU_DEVICE (int i, int j, int k, T n) noexcept - { - f(i, j, k, n, As{}...); - }); + l(CTOWrapper{f}); return true; } else { return false; } } - template - std::enable_if_t || std::is_same_v> - ParallelFor_helper1 (T const& N, F const& f, TypeList, - RO const& runtime_options) - { - bool found_option = (false || ... || - ParallelFor_helper2(N, f, - PPs{}, runtime_options)); - amrex::ignore_unused(found_option); - AMREX_ASSERT(found_option); - } - - template - std::enable_if_t> - ParallelFor_helper1 (Box const& box, T ncomp, F const& f, TypeList, - RO const& runtime_options) + template + void + AnyCTO_helper1 (const L& l, const F& f, TypeList, RO const& runtime_options) { - bool found_option = (false || ... || - ParallelFor_helper2(box, ncomp, f, - PPs{}, runtime_options)); + bool found_option = (false || ... || AnyCTO_helper2(l, f, PPs{}, runtime_options)); amrex::ignore_unused(found_option); AMREX_ASSERT(found_option); } @@ -92,51 +69,161 @@ namespace detail #endif -template -std::enable_if_t> -ParallelFor (TypeList /*list_of_compile_time_options*/, +/** + * \brief Compile time optimization of kernels with run time options. + * + * This is a generalized version of ParallelFor with CTOs that can support any function that + * takes in one lambda to launch a GPU kernel such as ParallelFor, ParallelForRNG, launch, etc. + * It uses fold expression to generate kernel launches for all combinations + * of the run time options. The kernel function can use constexpr if to + * discard unused code blocks for better run time performance. In the + * example below, the code will be expanded into 4*2=8 normal ParallelForRNGs + * for all combinations of the run time parameters. + \verbatim + int A_runtime_option = ...; + int B_runtime_option = ...; + enum A_options : int { A0, A1, A2, A3 }; + enum B_options : int { B0, B1 }; + AnyCTO(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + [&](auto cto_func){ + ParallelForRNG(N, cto_func); + }, + [=] AMREX_GPU_DEVICE (int i, const RandomEngine& engine, + auto A_control, auto B_control) + { + ... + if constexpr (A_control.value == A0) { + ... + } else if constexpr (A_control.value == A1) { + ... + } else if constexpr (A_control.value == A2) { + ... + } else { + ... + } + if constexpr (A_control.value != A3 && B_control.value == B1) { + ... + } + ... + } + ); + + constexpr int nthreads_per_block = ...; + int nblocks = ...; + AnyCTO(TypeList, + CompileTimeOptions>{}, + {A_runtime_option, B_runtime_option}, + [&](auto cto_func){ + launch(nblocks, Gpu::gpuStream(), cto_func); + }, + [=] AMREX_GPU_DEVICE (auto A_control, auto B_control){ + ... + } + ); + \endverbatim + * The static member function cto_func.GetOptions() can be used to obtain the runtime_options + * passed into AnyCTO, but at compile time. This enables some advanced use cases, + * such as changing the number of threads per block or the dimensionality of ParallelFor at runtime. + * For the second example -> decltype(void(intvect.size())) is necessary to + * disambiguate IntVectND<1> and int for the first argument of the kernel function. + \verbatim + int nthreads_per_block = ...; + AnyCTO(TypeList>{}, + {nthreads_per_block}, + [&](auto cto_func){ + constexpr std::array ctos = cto_func.GetOptions(); + constexpr int c_nthreads_per_block = ctos[0]; + ParallelFor(N, cto_func); + }, + [=] AMREX_GPU_DEVICE (int i, auto){ + ... + } + ); + + BoxND<6> box6D = ...; + int dims_needed = ...; + AnyCTO(TypeList>{}, + {dims_needed}, + [&](auto cto_func){ + constexpr std::array ctos = cto_func.GetOptions(); + constexpr int c_dims_needed = ctos[0]; + const auto box = BoxShrink(box6D); + ParallelFor(box, cto_func); + }, + [=] AMREX_GPU_DEVICE (auto intvect, auto) -> decltype(void(intvect.size())) { + ... + } + ); + \endverbatim + + * Note that due to a limitation of CUDA's extended device lambda, the + * constexpr if block cannot be the one that captures a variable first. + * If nvcc complains about it, you will have to manually capture it outside + * constexpr if. Alternatively, the constexpr if can be replaced with a regular if. + * Compilers can still perform the same optimizations since the condition is known at compile time. + * The data type for the parameters is int. + * + * \param list_of_compile_time_options list of all possible values of the parameters. + * \param runtime_options the run time parameters. + * \param l a callable object containing a CPU function that launches the provided GPU kernel. + * \param f a callable object containing the GPU kernel with optimizations. + */ +template +void AnyCTO ([[maybe_unused]] TypeList list_of_compile_time_options, std::array const& runtime_options, - T N, F&& f) + L&& l, F&& f) { #if (__cplusplus >= 201703L) - detail::ParallelFor_helper1(N, std::forward(f), - CartesianProduct(typename CTOs::list_type{}...), - runtime_options); + detail::AnyCTO_helper1(std::forward(l), std::forward(f), + CartesianProduct(typename CTOs::list_type{}...), + runtime_options); #else - amrex::ignore_unused(N, f, runtime_options); + amrex::ignore_unused(runtime_options, l, f); static_assert(std::is_integral::value, "This requires C++17"); #endif } -template -void ParallelFor (TypeList /*list_of_compile_time_options*/, +template +std::enable_if_t> +ParallelFor (TypeList ctos, + std::array const& runtime_options, + T N, F&& f) +{ + AnyCTO(ctos, runtime_options, + [&](auto cto_func){ + ParallelFor(N, cto_func); + }, + std::forward(f) + ); +} + +template +void ParallelFor (TypeList ctos, std::array const& runtime_options, - Box const& box, F&& f) + BoxND const& box, F&& f) { -#if (__cplusplus >= 201703L) - detail::ParallelFor_helper1(box, std::forward(f), - CartesianProduct(typename CTOs::list_type{}...), - runtime_options); -#else - amrex::ignore_unused(box, f, runtime_options); - static_assert(std::is_integral::value, "This requires C++17"); -#endif + AnyCTO(ctos, runtime_options, + [&](auto cto_func){ + ParallelFor(box, cto_func); + }, + std::forward(f) + ); } -template +template std::enable_if_t> -ParallelFor (TypeList /*list_of_compile_time_options*/, +ParallelFor (TypeList ctos, std::array const& runtime_options, - Box const& box, T ncomp, F&& f) + BoxND const& box, T ncomp, F&& f) { -#if (__cplusplus >= 201703L) - detail::ParallelFor_helper1(box, ncomp, std::forward(f), - CartesianProduct(typename CTOs::list_type{}...), - runtime_options); -#else - amrex::ignore_unused(box, ncomp, f, runtime_options); - static_assert(std::is_integral::value, "This requires C++17"); -#endif + AnyCTO(ctos, runtime_options, + [&](auto cto_func){ + ParallelFor(box, ncomp, cto_func); + }, + std::forward(f) + ); } /** @@ -164,7 +251,7 @@ ParallelFor (TypeList /*list_of_compile_time_options*/, ... } else if constexpr (A_control.value == A2) { ... - else { + } else { ... } if constexpr (A_control.value != A3 && B_control.value == B1) { @@ -218,7 +305,7 @@ ParallelFor (TypeList ctos, ... } else if constexpr (A_control.value == A2) { ... - else { + } else { ... } if constexpr (A_control.value != A3 && B_control.value == B1) { @@ -237,10 +324,10 @@ ParallelFor (TypeList ctos, * \param box a Box specifying the 3D for loop's range. * \param f a callable object taking three integers and working on the given cell. */ -template +template void ParallelFor (TypeList ctos, std::array const& option, - Box const& box, F&& f) + BoxND const& box, F&& f) { ParallelFor(ctos, option, box, std::forward(f)); } @@ -271,7 +358,7 @@ void ParallelFor (TypeList ctos, ... } else if constexpr (A_control.value == A2) { ... - else { + } else { ... } if constexpr (A_control.value != A3 && B_control.value == B1) { @@ -291,11 +378,11 @@ void ParallelFor (TypeList ctos, * \param ncomp an integer specifying the range for iteration over components. * \param f a callable object taking three integers and working on the given cell. */ -template +template std::enable_if_t> ParallelFor (TypeList ctos, std::array const& option, - Box const& box, T ncomp, F&& f) + BoxND const& box, T ncomp, F&& f) { ParallelFor(ctos, option, box, ncomp, std::forward(f)); } diff --git a/Src/Base/AMReX_DistributionMapping.cpp b/Src/Base/AMReX_DistributionMapping.cpp index cfe16b346a1..45304cf5e04 100644 --- a/Src/Base/AMReX_DistributionMapping.cpp +++ b/Src/Base/AMReX_DistributionMapping.cpp @@ -651,7 +651,7 @@ knapsack (const std::vector& wgts, if (efficiency < max_efficiency && do_full_knapsack && wblv.size() > 1 && wblv.begin()->size() > 1) { - BL_PROFILE_VAR("knapsack()swap", swap); + BL_PROFILE("knapsack()swap"); top: ; if (efficiency < max_efficiency && wblv.begin()->size() > 1) @@ -980,7 +980,7 @@ DistributionMapping::KnapSackProcessorMap (const DistributionMapping& olddm, new_efficiency = avg_weight / max_weight; if (new_efficiency < max_efficiency && wblv.size() > 1) { - BL_PROFILE_VAR("knapsack()swap", swap); + BL_PROFILE("knapsack()swap"); std::sort(wblv.begin(), wblv.end()); diff --git a/Src/Base/AMReX_GpuPrint.H b/Src/Base/AMReX_GpuPrint.H index 9e029ea8835..37f74f30bfc 100644 --- a/Src/Base/AMReX_GpuPrint.H +++ b/Src/Base/AMReX_GpuPrint.H @@ -2,8 +2,6 @@ #define AMREX_GPU_PRINT_H_ #include -#if !defined(__APPLE__) - #include #ifdef AMREX_USE_SYCL @@ -23,7 +21,9 @@ # define AMREX_DEVICE_PRINTF(...) std::printf(__VA_ARGS__); #elif defined(AMREX_USE_HIP) # define AMREX_DEVICE_PRINTF(...) ::printf(__VA_ARGS__); +#else +# define AMREX_DEVICE_PRINTF(format,...) { \ + std::printf(format, __VA_ARGS__); } #endif -#endif // !defined(__APPLE__) #endif // AMREX_GPU_PRINT_H_ diff --git a/Src/Base/AMReX_IntVect.H b/Src/Base/AMReX_IntVect.H index 6452f29b036..64457f123ed 100644 --- a/Src/Base/AMReX_IntVect.H +++ b/Src/Base/AMReX_IntVect.H @@ -55,8 +55,11 @@ public: static constexpr unsigned shift1 = sizeof(size_t)>=8 ? 20 : 10; static constexpr unsigned shift2 = sizeof(size_t)>=8 ? 40 : 20; if constexpr (dim == 1) { + amrex::ignore_unused(shift1); + amrex::ignore_unused(shift2); return static_cast(vec[0]); } else if constexpr (dim == 2) { + amrex::ignore_unused(shift2); return static_cast(vec[0]) ^ (static_cast(vec[1]) << shift1); } else if constexpr (dim == 3) { diff --git a/Src/Base/AMReX_PArena.cpp b/Src/Base/AMReX_PArena.cpp index 36155f3d32c..bbe2717ab07 100644 --- a/Src/Base/AMReX_PArena.cpp +++ b/Src/Base/AMReX_PArena.cpp @@ -62,6 +62,7 @@ PArena::alloc (std::size_t nbytes) AMREX_HIP_SAFE_CALL(hipMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaMallocAsync(&p, nbytes, m_pool, Gpu::gpuStream())); ) + m_profiler.profile_alloc(p, nbytes); return p; } else #endif @@ -93,6 +94,7 @@ PArena::free (void* p) #if defined (AMREX_GPU_STREAM_ALLOC_SUPPORT) if (Gpu::Device::memoryPoolsSupported()) { + m_profiler.profile_free(p); AMREX_HIP_OR_CUDA( AMREX_HIP_SAFE_CALL(hipFreeAsync(p, Gpu::gpuStream()));, AMREX_CUDA_SAFE_CALL(cudaFreeAsync(p, Gpu::gpuStream())); diff --git a/Src/Base/AMReX_ParmParse.H b/Src/Base/AMReX_ParmParse.H index cc9588793da..274858ed253 100644 --- a/Src/Base/AMReX_ParmParse.H +++ b/Src/Base/AMReX_ParmParse.H @@ -60,6 +60,9 @@ class RealVect; // '\n's. The "FILE = " definition is special. Rather than just // adding this entry to the database, it reads the contents of // into the database. +// For CI/CD workflows and out-of-source tests, the environment variable +// AMREX_INPUTS_FILE_PREFIX can be set to prefix every FILE = +// with a custom path. // // ParmParse stores all entries in a static table which is built the // first time a ParmParse object is constructed (usually in main()). @@ -1192,14 +1195,18 @@ public: */ template , std::enable_if_t = 0> - int query (const char* name, T& ref) + int query (const char* name, T& ref, int ival = FIRST) const { std::string s; - int exist = this->query(name, s); + int exist = this->query(name, s, ival); if (exist) { try { ref = amrex::getEnum(s); } catch (...) { + if (amrex::Verbose() > 0 ) { + amrex::Print() << "amrex::ParmParse::query (input name: " + << this->prefixedName(name) << "):\n"; + } throw; } } @@ -1216,13 +1223,17 @@ public: */ template , std::enable_if_t = 0> - void get (const char* name, T& ref) + void get (const char* name, T& ref, int ival = FIRST) const { std::string s; - this->get(name, s); + this->get(name, s, ival); try { ref = amrex::getEnum(s); } catch (...) { + if (amrex::Verbose() > 0 ) { + amrex::Print() << "amrex::ParmParse::get (input name: " + << this->prefixedName(name) << "):\n"; + } throw; } } @@ -1230,14 +1241,25 @@ public: //! Query an array of enum values using given name. template , std::enable_if_t = 0> - int queryarr (const char* name, std::vector& ref) + int queryarr (const char* name, + std::vector& ref, + int start_ix = FIRST, + int num_val = ALL) const { std::vector s; - int exist = this->queryarr(name, s); + int exist = this->queryarr(name, s, start_ix, num_val); if (exist) { ref.resize(s.size()); for (std::size_t i = 0; i < s.size(); ++i) { - ref[i] = amrex::getEnum(s[i]); + try { + ref[i] = amrex::getEnum(s[i]); + } catch (...) { + if (amrex::Verbose() > 0 ) { + amrex::Print() << "amrex::ParmParse::queryarr (input name: " + << this->prefixedName(name) << "):\n"; + } + throw; + } } } return exist; @@ -1246,13 +1268,24 @@ public: //! Get an array of enum values using given name. template , std::enable_if_t = 0> - void getarr (const char* name, std::vector& ref) + void getarr (const char* name, + std::vector& ref, + int start_ix = FIRST, + int num_val = ALL) const { std::vector s; - this->getarr(name, s); + this->getarr(name, s, start_ix, num_val); ref.resize(s.size()); for (std::size_t i = 0; i < s.size(); ++i) { - ref[i] = amrex::getEnum(s[i]); + try { + ref[i] = amrex::getEnum(s[i]); + } catch (...) { + if (amrex::Verbose() > 0 ) { + amrex::Print() << "amrex::ParmParse::getarr (input name: " + << this->prefixedName(name) << "):\n"; + } + throw; + } } } @@ -1268,10 +1301,10 @@ public: */ template , std::enable_if_t = 0> - int query_enum_case_insensitive (const char* name, T& ref) + int query_enum_case_insensitive (const char* name, T& ref, int ival = FIRST) const { std::string s; - int exist = this->query(name, s); + int exist = this->query(name, s, ival); if (exist) { s = amrex::toLower(s); auto const& enum_names = amrex::getEnumNameStrings(); @@ -1303,9 +1336,9 @@ public: */ template , std::enable_if_t = 0> - void get_enum_case_insensitive (const char* name, T& ref) + void get_enum_case_insensitive (const char* name, T& ref, int ival = FIRST) const { - int exist = this->query_enum_case_insensitive(name, ref); + int exist = this->query_enum_case_insensitive(name, ref, ival); if (!exist) { std::string msg("get_enum_case_insensitive(\""); msg.append(name).append("\",").append(amrex::getEnumClassName()) @@ -1387,10 +1420,10 @@ public: static std::string ParserPrefix; -protected: - [[nodiscard]] std::string prefixedName (const std::string_view& str) const; +protected: + std::string m_prefix; // Prefix used in keyword search std::string m_parser_prefix; // Prefix used by Parser Table* m_table; diff --git a/Src/Base/AMReX_ParmParse.cpp b/Src/Base/AMReX_ParmParse.cpp index df1e18e9b08..767bd2a4d0d 100644 --- a/Src/Base/AMReX_ParmParse.cpp +++ b/Src/Base/AMReX_ParmParse.cpp @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -407,6 +408,19 @@ read_file (const char* fname, ParmParse::Table& tab) // if ( fname != nullptr && fname[0] != 0 ) { + std::string filename = fname; + + // optional prefix to search files in + char const *amrex_inputs_file_prefix_c = std::getenv("AMREX_INPUTS_FILE_PREFIX"); + if (amrex_inputs_file_prefix_c != nullptr) { + // we expect a directory path as the prefix: append a trailing "/" if missing + auto amrex_inputs_file_prefix = std::string(amrex_inputs_file_prefix_c); + if (amrex_inputs_file_prefix.back() != '/') { + amrex_inputs_file_prefix += "/"; + } + filename = amrex_inputs_file_prefix + filename; + } + #ifdef AMREX_USE_MPI if (ParallelDescriptor::Communicator() == MPI_COMM_NULL) { @@ -415,7 +429,6 @@ read_file (const char* fname, ParmParse::Table& tab) #endif Vector fileCharPtr; - std::string filename = fname; ParallelDescriptor::ReadAndBcastFile(filename, fileCharPtr); std::istringstream is(fileCharPtr.data()); @@ -1283,7 +1296,7 @@ ParmParse::query (const char* name, } void -ParmParse::add (const char* name, +ParmParse::add (const char* name, // NOLINT(readability-make-member-function-const) const bool val) { saddval(prefixedName(name),val); @@ -1315,7 +1328,7 @@ ParmParse::query (const char* name, int& ref, int ival) const } void -ParmParse::add (const char* name, const int val) +ParmParse::add (const char* name, const int val) // NOLINT(readability-make-member-function-const) { saddval(prefixedName(name),val); } @@ -1349,7 +1362,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } @@ -1381,7 +1394,7 @@ ParmParse::query (const char* name, long& ref, int ival) const } void -ParmParse::add (const char* name, +ParmParse::add (const char* name, // NOLINT(readability-make-member-function-const) const long val) { saddval(prefixedName(name),val); @@ -1416,7 +1429,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } @@ -1447,7 +1460,7 @@ ParmParse::query (const char* name, long long& ref, int ival) const } void -ParmParse::add (const char* name, const long long val) +ParmParse::add (const char* name, const long long val) // NOLINT(readability-make-member-function-const) { saddval(prefixedName(name),val); } @@ -1481,7 +1494,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, int start_ix } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } @@ -1512,7 +1525,7 @@ ParmParse::query (const char* name, float& ref, int ival) const } void -ParmParse::add (const char* name, const float val) +ParmParse::add (const char* name, const float val) // NOLINT(readability-make-member-function-const) { saddval(prefixedName(name),val); } @@ -1546,7 +1559,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } @@ -1579,7 +1592,7 @@ ParmParse::query (const char* name, double& ref, int ival) const } void -ParmParse::add (const char* name, const double val) +ParmParse::add (const char* name, const double val) // NOLINT(readability-make-member-function-const) { saddval(prefixedName(name),val); } @@ -1613,7 +1626,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, int start_ix, } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } @@ -1646,7 +1659,7 @@ ParmParse::query (const char* name, std::string& ref, int ival) const } void -ParmParse::add (const char* name, const std::string& val) +ParmParse::add (const char* name, const std::string& val) // NOLINT(readability-make-member-function-const) { saddval(prefixedName(name),val); } @@ -1680,7 +1693,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } @@ -1713,7 +1726,7 @@ ParmParse::query (const char* name, IntVect& ref, int ival) const } void -ParmParse::add (const char* name, const IntVect& val) +ParmParse::add (const char* name, const IntVect& val) // NOLINT(readability-make-member-function-const) { saddval(prefixedName(name),val); } @@ -1747,7 +1760,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } @@ -1778,7 +1791,7 @@ ParmParse::query (const char* name, Box& ref, int ival) const } void -ParmParse::add (const char* name, const Box& val) +ParmParse::add (const char* name, const Box& val) // NOLINT(readability-make-member-function-const) { saddval(prefixedName(name),val); } @@ -1812,7 +1825,7 @@ ParmParse::queryarr (const char* name, std::vector& ref, } void -ParmParse::addarr (const char* name, const std::vector& ref) +ParmParse::addarr (const char* name, const std::vector& ref) // NOLINT(readability-make-member-function-const) { saddarr(prefixedName(name),ref); } diff --git a/Src/Base/AMReX_Stack.H b/Src/Base/AMReX_Stack.H new file mode 100644 index 00000000000..8e9e83f8682 --- /dev/null +++ b/Src/Base/AMReX_Stack.H @@ -0,0 +1,24 @@ +#ifndef AMREX_STACK_H_ +#define AMREX_STACK_H_ + +namespace amrex { + +template +struct Stack +{ +public: + constexpr void push (T v) { m_data[m_size++] = v; } + constexpr void pop () { --m_size; } + [[nodiscard]] constexpr bool empty () const { return m_size == 0; } + [[nodiscard]] constexpr int size () const { return m_size; } + [[nodiscard]] constexpr T const& top () const { return m_data[m_size-1]; } + [[nodiscard]] constexpr T & top () { return m_data[m_size-1]; } + [[nodiscard]] constexpr T operator[] (int i) const { return m_data[i]; } +private: + T m_data[N]; + int m_size = 0; +}; + +} + +#endif diff --git a/Src/Base/AMReX_TinyProfiler.H b/Src/Base/AMReX_TinyProfiler.H index 0228949beb1..3c7f2da7758 100644 --- a/Src/Base/AMReX_TinyProfiler.H +++ b/Src/Base/AMReX_TinyProfiler.H @@ -57,7 +57,7 @@ public: static void MemoryInitialize () noexcept; static void MemoryFinalize (bool bFlushing = false) noexcept; - static void RegisterArena (const std::string& memory_name, + static bool RegisterArena (const std::string& memory_name, std::map& memstats) noexcept; static void DeregisterArena (std::map& memstats) noexcept; @@ -143,6 +143,7 @@ private: static bool memprof_enabled; static std::string output_file; + static std::string const& get_output_file (); static void PrintStats (std::map& regstats, double dt_max, std::ostream* os); static void PrintMemStats (std::map& memstats, diff --git a/Src/Base/AMReX_TinyProfiler.cpp b/Src/Base/AMReX_TinyProfiler.cpp index 4360cf7671f..22bcaa61733 100644 --- a/Src/Base/AMReX_TinyProfiler.cpp +++ b/Src/Base/AMReX_TinyProfiler.cpp @@ -57,6 +57,8 @@ std::string TinyProfiler::output_file; namespace { constexpr char mainregion[] = "main"; + bool finalized = false; + bool memprof_finalized = false; } TinyProfiler::TinyProfiler (std::string funcname) noexcept @@ -319,23 +321,14 @@ TinyProfiler::Initialize () noexcept pp.queryAdd("print_threshold", print_threshold); pp.queryAdd("enabled", enabled); - pp.queryAdd("output_file", output_file); } if (!enabled) { return; } - if (ParallelDescriptor::IOProcessor()) { - static bool first = true; - if (first && !output_file.empty() && output_file != "/dev/null") { - if (FileSystem::Exists(output_file)) { - FileSystem::Remove(output_file); - } - first = false; - } - } - regionstack.emplace_back(mainregion); t_init = amrex::second(); + + finalized = false; } void @@ -353,6 +346,8 @@ TinyProfiler::MemoryInitialize () noexcept #ifdef AMREX_USE_OMP mem_stack_thread_private.resize(omp_get_max_threads()); #endif + + memprof_finalized = false; } void @@ -360,7 +355,6 @@ TinyProfiler::Finalize (bool bFlushing) noexcept { if (!enabled) { return; } - static bool finalized = false; if (!bFlushing) { // If flushing, don't make this the last time! if (finalized) { return; @@ -388,12 +382,13 @@ TinyProfiler::Finalize (bool bFlushing) noexcept std::ofstream ofs; std::ostream* os = nullptr; if (ParallelDescriptor::IOProcessor()) { - if (output_file.empty()) { + auto const& ofile = get_output_file(); + if (ofile.empty()) { os = &(amrex::OutStream()); - } else if (output_file != "/dev/null") { - ofs.open(output_file, std::ios_base::app); + } else if (ofile != "/dev/null") { + ofs.open(ofile, std::ios_base::app); if (!ofs.is_open()) { - amrex::Error("TinyProfiler failed to open "+output_file); + amrex::Error("TinyProfiler failed to open "+ofile); } os = static_cast(&ofs); } @@ -440,6 +435,12 @@ TinyProfiler::Finalize (bool bFlushing) noexcept } } } + + if (!bFlushing) { + regionstack.clear(); + ttstack.clear(); + statsmap.clear(); + } } void @@ -449,12 +450,11 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept // This function must be called BEFORE the profiled arenas are deleted - static bool finalized = false; if (!bFlushing) { // If flushing, don't make this the last time! - if (finalized) { + if (memprof_finalized) { return; } else { - finalized = true; + memprof_finalized = true; } } @@ -467,12 +467,13 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept std::ostream* os = nullptr; std::streamsize oldprec = 0; if (ParallelDescriptor::IOProcessor()) { - if (output_file.empty()) { + auto const& ofile = get_output_file(); + if (ofile.empty()) { os = &(amrex::OutStream()); - } else if (output_file != "/dev/null") { - ofs.open(output_file, std::ios_base::app); + } else if (ofile != "/dev/null") { + ofs.open(ofile, std::ios_base::app); if (!ofs.is_open()) { - amrex::Error("TinyProfiler failed to open "+output_file); + amrex::Error("TinyProfiler failed to open "+ofile); } os = static_cast(&ofs); } @@ -490,14 +491,15 @@ TinyProfiler::MemoryFinalize (bool bFlushing) noexcept if(os) { os->precision(oldprec); } } -void +bool TinyProfiler::RegisterArena (const std::string& memory_name, std::map& memstats) noexcept { - if (!memprof_enabled) { return; } + if (!memprof_enabled) { return false; } all_memstats.push_back(&memstats); all_memnames.push_back(memory_name); + return true; } void @@ -954,4 +956,28 @@ TinyProfiler::PrintCallStack (std::ostream& os) } } +std::string const& +TinyProfiler::get_output_file () +{ + // Instead of reading it only once, we could try to read the parameter + // every time. But I am not sure how useful that might be. + static bool first = true; + if (first) { + first = false; + + amrex::ParmParse pp("tiny_profiler"); + pp.query("output_file", output_file); + + if (ParallelDescriptor::IOProcessor()) { + if (!output_file.empty() && output_file != "/dev/null") { + if (FileSystem::Exists(output_file)) { + FileSystem::Remove(output_file); + } + } + } + } + + return output_file; +} + } diff --git a/Src/Base/AMReX_iMultiFab.H b/Src/Base/AMReX_iMultiFab.H index eb1e350433a..39896975b7b 100644 --- a/Src/Base/AMReX_iMultiFab.H +++ b/Src/Base/AMReX_iMultiFab.H @@ -164,6 +164,11 @@ public: */ [[nodiscard]] Long sum (int comp, int nghost = 0, bool local = false) const; + /** + * \brief Returns the sum of component "comp" in the given "region". -- no ghost cells are included. + */ + [[nodiscard]] Long sum (Box const& region, int comp = 0, bool local = false) const; + /** * \brief Adds the scalar value val to the value of each cell in the * specified subregion of the iMultiFab. The subregion consists diff --git a/Src/Base/AMReX_iMultiFab.cpp b/Src/Base/AMReX_iMultiFab.cpp index 090f1ca96d6..c78e3b4a367 100644 --- a/Src/Base/AMReX_iMultiFab.cpp +++ b/Src/Base/AMReX_iMultiFab.cpp @@ -431,6 +431,51 @@ iMultiFab::sum (int comp, int nghost, bool local) const return sm; } +Long +iMultiFab::sum (Box const& region, int comp, bool local) const +{ + BL_PROFILE("iMultiFab::sum(region)"); + + Long sm = 0; + +#ifdef AMREX_USE_GPU + if (Gpu::inLaunchRegion()) + { + auto const& ma = this->const_arrays(); + sm = ParReduce(TypeList{}, TypeList{}, *this, IntVect(0), + [=] AMREX_GPU_DEVICE (int box_no, int i, int j, int k) noexcept -> GpuTuple + { + return (region.contains(i,j,k)) ? static_cast(ma[box_no](i,j,k,comp)) : Long(0); + }); + } + else +#endif + { +#ifdef AMREX_USE_OMP +#pragma omp parallel if (!system::regtest_reduction) reduction(+:sm) +#endif + for (MFIter mfi(*this,true); mfi.isValid(); ++mfi) + { + const Box& bx = mfi.tilebox() & region; + if (bx.ok()) { + Array4 const& fab = this->const_array(mfi); + auto tmp = Long(0); + AMREX_LOOP_3D(bx, i, j, k, + { + tmp += fab(i,j,k,comp); + }); + sm += tmp; + } + } + } + + if (!local) { + ParallelAllReduce::Sum(sm, ParallelContext::CommunicatorSub()); + } + + return sm; +} + namespace { IntVect diff --git a/Src/Base/AMReX_parmparse_mod.F90 b/Src/Base/AMReX_parmparse_mod.F90 index 2d33b4da40d..e7a8748503b 100644 --- a/Src/Base/AMReX_parmparse_mod.F90 +++ b/Src/Base/AMReX_parmparse_mod.F90 @@ -241,6 +241,10 @@ subroutine amrex_parmparse_add_stringarr (pp, name, v, n) bind(c) end subroutine amrex_parmparse_add_stringarr end interface + interface amrex_parmparse_destroy + module procedure amrex_parmparse_destroy + end interface amrex_parmparse_destroy + contains subroutine amrex_parmparse_build (pp, name) diff --git a/Src/Base/CMakeLists.txt b/Src/Base/CMakeLists.txt index 1ee0b236122..882f401228a 100644 --- a/Src/Base/CMakeLists.txt +++ b/Src/Base/CMakeLists.txt @@ -31,6 +31,7 @@ foreach(D IN LISTS AMReX_SPACEDIM) AMReX_parmparse_fi.cpp AMReX_ParmParse.H AMReX_Functional.H + AMReX_Stack.H AMReX_String.H AMReX_String.cpp AMReX_Utility.H diff --git a/Src/Base/Make.package b/Src/Base/Make.package index a8c1d0faaa1..c64fa50f11d 100644 --- a/Src/Base/Make.package +++ b/Src/Base/Make.package @@ -23,6 +23,8 @@ C$(AMREX_BASE)_sources += AMReX_PODVector.cpp C$(AMREX_BASE)_headers += AMReX_BlockMutex.H C$(AMREX_BASE)_sources += AMReX_BlockMutex.cpp +C$(AMREX_BASE)_headers += AMReX_Stack.H + C$(AMREX_BASE)_headers += AMReX_String.H C$(AMREX_BASE)_sources += AMReX_String.cpp diff --git a/Src/Base/Parser/AMReX_IParser_Exe.H b/Src/Base/Parser/AMReX_IParser_Exe.H index e7e41c44d48..a68fa32981e 100644 --- a/Src/Base/Parser/AMReX_IParser_Exe.H +++ b/Src/Base/Parser/AMReX_IParser_Exe.H @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -226,24 +227,12 @@ struct alignas(8) IParserExeJUMP { int offset; }; -template -struct IParserStack -{ - long long m_data[N]; - int m_size = 0; - constexpr void push (long long v) { m_data[m_size++] = v; } - constexpr void pop () { --m_size; } - [[nodiscard]] constexpr long long const& top () const { return m_data[m_size-1]; } - [[nodiscard]] constexpr long long & top () { return m_data[m_size-1]; } - [[nodiscard]] constexpr long long operator[] (int i) const { return m_data[i]; } -}; - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE long long iparser_exe_eval (const char* p, long long const* x) { if (p == nullptr) { return std::numeric_limits::max(); } - IParserStack pstack; + Stack pstack; while (*((iparser_exe_t*)p) != IPARSER_EXE_NULL) { switch (*((iparser_exe_t*)p)) { diff --git a/Src/Base/Parser/AMReX_Parser_Exe.H b/Src/Base/Parser/AMReX_Parser_Exe.H index 37a0b89da78..a5427e6e65c 100644 --- a/Src/Base/Parser/AMReX_Parser_Exe.H +++ b/Src/Base/Parser/AMReX_Parser_Exe.H @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -217,24 +218,12 @@ struct alignas(8) ParserExeJUMP { int offset; }; -template -struct ParserStack -{ - double m_data[N]; - int m_size = 0; - constexpr void push (double v) { m_data[m_size++] = v; } - constexpr void pop () { --m_size; } - [[nodiscard]] constexpr double const& top () const { return m_data[m_size-1]; } - [[nodiscard]] constexpr double & top () { return m_data[m_size-1]; } - [[nodiscard]] constexpr double operator[] (int i) const { return m_data[i]; } -}; - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE double parser_exe_eval (const char* p, double const* x) { if (p == nullptr) { return std::numeric_limits::max(); } - ParserStack pstack; + Stack pstack; while (*((parser_exe_t*)p) != PARSER_EXE_NULL) { // NOLINT switch (*((parser_exe_t*)p)) { diff --git a/Src/EB/AMReX_EB2_2D_C.cpp b/Src/EB/AMReX_EB2_2D_C.cpp index b2bbde200c5..b99b5559c77 100644 --- a/Src/EB/AMReX_EB2_2D_C.cpp +++ b/Src/EB/AMReX_EB2_2D_C.cpp @@ -30,8 +30,7 @@ void set_eb_data (const int i, const int j, const Real apnorm = std::hypot(daxp,dayp) + 1.e-30_rt*std::sqrt(dx[0]*dx[1]); const Real nx = daxp * (1.0_rt/apnorm); const Real ny = dayp * (1.0_rt/apnorm); - const Real bareascaling = std::sqrt( (nx*dx[0])*(nx*dx[0]) + - (ny*dx[1])*(ny*dx[1]) ); + const Real bareascaling = std::sqrt(Math::powi<2>(nx*dx[1]) + Math::powi<2>(ny*dx[0])); const Real nxabs = std::abs(nx); const Real nyabs = std::abs(ny); diff --git a/Src/EB/AMReX_EB2_3D_C.cpp b/Src/EB/AMReX_EB2_3D_C.cpp index 73170adaeeb..2d02e53bdc7 100644 --- a/Src/EB/AMReX_EB2_3D_C.cpp +++ b/Src/EB/AMReX_EB2_3D_C.cpp @@ -101,7 +101,9 @@ void set_eb_data (const int i, const int j, const int k, bnorm(i,j,k,0) = nx; bnorm(i,j,k,1) = ny; bnorm(i,j,k,2) = nz; - barea(i,j,k) = (nx*dapx/(dx[1]*dx[2]) + ny*dapy/(dx[0]*dx[2]) + nz*dapz/(dx[0]*dx[1])); + barea(i,j,k) = (nx*dapx + ny*dapy + nz*dapz) / std::sqrt(Math::powi<2>(nx*dx[1]*dx[2]) + + Math::powi<2>(ny*dx[0]*dx[2]) + + Math::powi<2>(nz*dx[0]*dx[1])); Real aax = 0.5_rt*(axm+axp); Real aay = 0.5_rt*(aym+ayp); diff --git a/Src/EB/AMReX_EB2_IndexSpace_STL.cpp b/Src/EB/AMReX_EB2_IndexSpace_STL.cpp index 662aaf14dd6..70e3b492d82 100644 --- a/Src/EB/AMReX_EB2_IndexSpace_STL.cpp +++ b/Src/EB/AMReX_EB2_IndexSpace_STL.cpp @@ -83,9 +83,12 @@ IndexSpaceSTL::getGeometry (const Box& dom) const } void -IndexSpaceSTL::addFineLevels (int /*num_new_fine_levels*/) +IndexSpaceSTL::addFineLevels (int num_new_fine_levels) { - amrex::Abort("IndexSpaceSTL::addFineLevels: todo"); + // This function is a no op if not adding levels, otherwise TODO + if (num_new_fine_levels > 0) { + amrex::Abort("IndexSpaceSTL::addFineLevels: todo"); + } } void diff --git a/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp index cd811d73688..966b5c424e1 100644 --- a/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp +++ b/Src/EB/AMReX_EB2_IndexSpace_chkpt_file.cpp @@ -78,9 +78,12 @@ IndexSpaceChkptFile::getGeometry (const Box& dom) const } void -IndexSpaceChkptFile::addFineLevels (int /*num_new_fine_levels*/) +IndexSpaceChkptFile::addFineLevels (int num_new_fine_levels) { - amrex::Abort("IndexSpaceChkptFile::addFineLevels: not supported"); + // This function is a no op if not adding levels, otherwise TODO + if (num_new_fine_levels > 0) { + amrex::Abort("IndexSpaceChkptFile::addFineLevels: not supported"); + } } void diff --git a/Src/F_Interfaces/AmrCore/AMReX_fluxregister_mod.F90 b/Src/F_Interfaces/AmrCore/AMReX_fluxregister_mod.F90 index 3e40c9cd517..21937b57b10 100644 --- a/Src/F_Interfaces/AmrCore/AMReX_fluxregister_mod.F90 +++ b/Src/F_Interfaces/AmrCore/AMReX_fluxregister_mod.F90 @@ -103,6 +103,10 @@ subroutine amrex_fi_fluxregister_overwrite (fr, flxs, scale, geom) bind(c) end subroutine amrex_fi_fluxregister_overwrite end interface + interface amrex_fluxregister_destroy + module procedure amrex_fluxregister_destroy + end interface amrex_fluxregister_destroy + contains subroutine amrex_fluxregister_build (fr, ba, dm, ref_ratio, fine_lev, ncomp) diff --git a/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 b/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 index b156a8ae996..0181c6cfb9c 100644 --- a/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_boxarray_mod.F90 @@ -45,6 +45,10 @@ module amrex_boxarray_module module procedure amrex_boxarray_print end interface amrex_print + interface amrex_boxarray_destroy + module procedure amrex_boxarray_destroy + end interface amrex_boxarray_destroy + ! interfaces to cpp functions interface diff --git a/Src/F_Interfaces/Base/AMReX_distromap_mod.F90 b/Src/F_Interfaces/Base/AMReX_distromap_mod.F90 index 26316ffab21..adbb91b4421 100644 --- a/Src/F_Interfaces/Base/AMReX_distromap_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_distromap_mod.F90 @@ -34,6 +34,10 @@ module amrex_distromap_module module procedure amrex_distromap_print end interface amrex_print + interface amrex_distromap_destroy + module procedure amrex_distromap_destroy + end interface amrex_distromap_destroy + ! interfaces to cpp functions interface diff --git a/Src/F_Interfaces/Base/AMReX_fab_mod.F90 b/Src/F_Interfaces/Base/AMReX_fab_mod.F90 index d36e8f4ea87..b76e8645ea3 100644 --- a/Src/F_Interfaces/Base/AMReX_fab_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_fab_mod.F90 @@ -42,6 +42,10 @@ module amrex_fab_module module procedure amrex_fab_build_install end interface amrex_fab_build + interface amrex_fab_destroy + module procedure amrex_fab_destroy + end interface amrex_fab_destroy + contains ! Build a fab, allocate own memory diff --git a/Src/F_Interfaces/Base/AMReX_geometry_mod.F90 b/Src/F_Interfaces/Base/AMReX_geometry_mod.F90 index d65a8f6d980..7492b80c7b2 100644 --- a/Src/F_Interfaces/Base/AMReX_geometry_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_geometry_mod.F90 @@ -76,6 +76,10 @@ subroutine amrex_fi_geometry_get_intdomain (geom,lo,hi) bind(c) end subroutine amrex_fi_geometry_get_intdomain end interface + interface amrex_geometry_destroy + module procedure amrex_geometry_destroy + end interface amrex_geometry_destroy + contains subroutine amrex_geometry_finalize () diff --git a/Src/F_Interfaces/Base/AMReX_multifab_mod.F90 b/Src/F_Interfaces/Base/AMReX_multifab_mod.F90 index 81ce03b9f3d..93f2c4be628 100644 --- a/Src/F_Interfaces/Base/AMReX_multifab_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_multifab_mod.F90 @@ -101,6 +101,10 @@ module amrex_multifab_module module procedure amrex_multifab_build_a end interface amrex_multifab_build + interface amrex_multifab_destroy + module procedure amrex_multifab_destroy + end interface amrex_multifab_destroy + type, public :: amrex_imultifab logical :: owner = .false. type (c_ptr) :: p = c_null_ptr @@ -129,6 +133,10 @@ module amrex_multifab_module module procedure amrex_imultifab_build_a end interface amrex_imultifab_build + interface amrex_imultifab_destroy + module procedure amrex_imultifab_destroy + end interface amrex_imultifab_destroy + type, public :: amrex_mfiter type(c_ptr) :: p = c_null_ptr integer ,private :: counter = -1 @@ -159,6 +167,10 @@ module amrex_multifab_module module procedure amrex_mfiter_build_badm_s end interface amrex_mfiter_build + interface amrex_mfiter_destroy + module procedure amrex_mfiter_destroy + end interface amrex_mfiter_destroy + ! interfaces to c++ functions interface diff --git a/Src/F_Interfaces/Base/AMReX_multifabutil_fi.cpp b/Src/F_Interfaces/Base/AMReX_multifabutil_fi.cpp index d59c2d02053..61d0385276b 100644 --- a/Src/F_Interfaces/Base/AMReX_multifabutil_fi.cpp +++ b/Src/F_Interfaces/Base/AMReX_multifabutil_fi.cpp @@ -5,12 +5,18 @@ using namespace amrex; extern "C" { void amrex_fi_average_down (const MultiFab* S_fine, MultiFab* S_crse, - const Geometry* fgeom, const Geometry* cgeom, - int scomp, int ncomp, int rr) + const Geometry* fgeom, const Geometry* cgeom, + int scomp, int ncomp, int rr) { amrex::average_down(*S_fine, *S_crse, *fgeom, *cgeom, scomp, ncomp, rr); } + void amrex_fi_average_down_cell_node (const MultiFab* S_fine, MultiFab* S_crse, + int scomp, int ncomp, int rr) + { + amrex::average_down(*S_fine, *S_crse, scomp, ncomp, rr); + } + void amrex_fi_average_down_faces (MultiFab const* fmf[], MultiFab* cmf[], Geometry const* cgeom, int scomp, int ncomp, int rr) diff --git a/Src/F_Interfaces/Base/AMReX_multifabutil_mod.F90 b/Src/F_Interfaces/Base/AMReX_multifabutil_mod.F90 index 9575b217a73..643f661649f 100644 --- a/Src/F_Interfaces/Base/AMReX_multifabutil_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_multifabutil_mod.F90 @@ -8,7 +8,11 @@ module amrex_multifabutil_module implicit none private - public :: amrex_average_down, amrex_average_down_faces, amrex_average_cellcenter_to_face + public :: amrex_average_down, & ! volume weighted average down of cell data + & amrex_average_down_cell, & ! average down of cell data + & amrex_average_down_node, & ! average down of nodal data + & amrex_average_down_faces, & ! average down of face data + & amrex_average_cellcenter_to_face ! average from cell centers to faces interface subroutine amrex_fi_average_down (fmf, cmf, fgeom, cgeom, scomp, ncomp, rr) bind(c) @@ -18,6 +22,13 @@ subroutine amrex_fi_average_down (fmf, cmf, fgeom, cgeom, scomp, ncomp, rr) bind integer(c_int), value :: scomp, ncomp, rr end subroutine amrex_fi_average_down + subroutine amrex_fi_average_down_cell_node (fmf, cmf, scomp, ncomp, rr) bind(c) + import + implicit none + type(c_ptr), value :: fmf, cmf + integer(c_int), value :: scomp, ncomp, rr + end subroutine amrex_fi_average_down_cell_node + subroutine amrex_fi_average_down_faces (fmf, cmf, cgeom, scomp, ncomp, rr) bind(c) import implicit none @@ -45,6 +56,19 @@ subroutine amrex_average_down (fmf, cmf, fgeom, cgeom, scomp, ncomp, rr) call amrex_fi_average_down(fmf%p, cmf%p, fgeom%p, cgeom%p, scomp-1, ncomp, rr) end subroutine amrex_average_down + subroutine amrex_average_down_cell (fmf, cmf, scomp, ncomp, rr) + type(amrex_multifab), intent(in ) :: fmf + type(amrex_multifab), intent(inout) :: cmf + integer, intent(in) :: scomp, ncomp, rr + call amrex_fi_average_down_cell_node(fmf%p, cmf%p, scomp-1, ncomp, rr) + end subroutine amrex_average_down_cell + + subroutine amrex_average_down_node (fmf, cmf, scomp, ncomp, rr) + type(amrex_multifab), intent(in ) :: fmf + type(amrex_multifab), intent(inout) :: cmf + integer, intent(in) :: scomp, ncomp, rr + call amrex_fi_average_down_cell_node(fmf%p, cmf%p, scomp-1, ncomp, rr) + end subroutine amrex_average_down_node subroutine amrex_average_down_faces (fmf, cmf, cgeom, scomp, ncomp, rr) type(amrex_multifab), intent(in ) :: fmf(amrex_spacedim) diff --git a/Src/F_Interfaces/Base/AMReX_physbc_mod.F90 b/Src/F_Interfaces/Base/AMReX_physbc_mod.F90 index 6831fe81514..1f707575a83 100644 --- a/Src/F_Interfaces/Base/AMReX_physbc_mod.F90 +++ b/Src/F_Interfaces/Base/AMReX_physbc_mod.F90 @@ -47,6 +47,10 @@ subroutine amrex_fi_delete_physbc (pbc) bind(c) end subroutine amrex_fi_delete_physbc end interface + interface amrex_physbc_destroy + module procedure amrex_physbc_destroy + end interface amrex_physbc_destroy + contains subroutine amrex_physbc_build (pbc, fill, geom) diff --git a/Src/F_Interfaces/LinearSolvers/AMReX_abeclaplacian_mod.F90 b/Src/F_Interfaces/LinearSolvers/AMReX_abeclaplacian_mod.F90 index 5c9425168ec..d90a2faeae4 100644 --- a/Src/F_Interfaces/LinearSolvers/AMReX_abeclaplacian_mod.F90 +++ b/Src/F_Interfaces/LinearSolvers/AMReX_abeclaplacian_mod.F90 @@ -58,6 +58,10 @@ subroutine amrex_fi_abeclap_set_bcoeffs (abeclap, amrlev, beta) bind(c) end subroutine amrex_fi_abeclap_set_bcoeffs end interface + interface amrex_abeclaplacian_destroy + module procedure amrex_abeclaplacian_destroy + end interface amrex_abeclaplacian_destroy + contains subroutine amrex_abeclaplacian_assign (dst, src) diff --git a/Src/F_Interfaces/LinearSolvers/AMReX_multigrid_mod.F90 b/Src/F_Interfaces/LinearSolvers/AMReX_multigrid_mod.F90 index bc7ae734d01..5ea57e5196a 100644 --- a/Src/F_Interfaces/LinearSolvers/AMReX_multigrid_mod.F90 +++ b/Src/F_Interfaces/LinearSolvers/AMReX_multigrid_mod.F90 @@ -154,6 +154,10 @@ subroutine amrex_fi_multigrid_set_final_fill_bc (mg, f) bind(c) end subroutine amrex_fi_multigrid_set_final_fill_bc end interface + interface amrex_multigrid_destroy + module procedure amrex_multigrid_destroy + end interface amrex_multigrid_destroy + contains subroutine amrex_multigrid_assign (dst, src) diff --git a/Src/F_Interfaces/LinearSolvers/AMReX_poisson_mod.F90 b/Src/F_Interfaces/LinearSolvers/AMReX_poisson_mod.F90 index 3690419f22d..33a02336af1 100644 --- a/Src/F_Interfaces/LinearSolvers/AMReX_poisson_mod.F90 +++ b/Src/F_Interfaces/LinearSolvers/AMReX_poisson_mod.F90 @@ -35,6 +35,10 @@ subroutine amrex_fi_delete_linop (linop) bind(c) end subroutine amrex_fi_delete_linop end interface + interface amrex_poisson_destroy + module procedure amrex_poisson_destroy + end interface amrex_poisson_destroy + contains subroutine amrex_poisson_assign (dst, src) diff --git a/Src/F_Interfaces/Particle/AMReX_particlecontainer_mod.F90 b/Src/F_Interfaces/Particle/AMReX_particlecontainer_mod.F90 index bdb4c9a2310..4def2e000f9 100644 --- a/Src/F_Interfaces/Particle/AMReX_particlecontainer_mod.F90 +++ b/Src/F_Interfaces/Particle/AMReX_particlecontainer_mod.F90 @@ -163,6 +163,10 @@ end subroutine amrex_fi_num_particles_i end interface + interface amrex_particlecontainer_destroy + module procedure amrex_particlecontainer_destroy + end interface amrex_particlecontainer_destroy + contains subroutine amrex_particlecontainer_build (pc, amrcore) diff --git a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H index 9b56b8049df..7b5fb069423 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLABecLaplacian.H @@ -1281,7 +1281,7 @@ MLABecLaplacianT::supportNSolve () const bool support = false; if (this->m_overset_mask[0][0]) { if (this->m_geom[0].back().Domain().coarsenable(MLLinOp::mg_coarsen_ratio, - MLLinOp::mg_domain_min_width) + this->mg_domain_min_width) && this->m_grids[0].back().coarsenable(MLLinOp::mg_coarsen_ratio, MLLinOp::mg_box_min_width)) { support = true; diff --git a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H index 3cc623b761e..ce6a8b53335 100644 --- a/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H +++ b/Src/LinearSolvers/MLMG/AMReX_MLLinOp.H @@ -570,11 +570,7 @@ protected: static constexpr int mg_coarsen_ratio = 2; static constexpr int mg_box_min_width = 2; -#ifdef AMREX_USE_EB - static constexpr int mg_domain_min_width = 4; -#else - static constexpr int mg_domain_min_width = 2; -#endif + int mg_domain_min_width = 2; LPInfo info; @@ -803,6 +799,15 @@ MLLinOpT::defineGrids (const Vector& a_geom, { BL_PROFILE("MLLinOp::defineGrids()"); +#ifdef AMREX_USE_EB + if ( ! a_factory.empty() ) { + auto const* ebf = dynamic_cast(a_factory[0]); + if (ebf && !(ebf->isAllRegular())) { // Has non-trivial EB + mg_domain_min_width = 4; + } + } +#endif + m_num_amr_levels = 0; for (int amrlev = 0; amrlev < a_geom.size(); amrlev++) { if (!a_grids[amrlev].empty()) {