diff --git a/CHANGES.md b/CHANGES.md index 8a5be8752f..7b7e78a0da 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,35 @@ +# 24.08 + + * lazy QueueReduction has been enabled for the timing diagnostics + (#2926) + + * The `job_info` file output now correctly labels the compute time + as GPU-hours instead of CPU-hours when running on GPUs (#2930) + + * We can now output warnings when running on GPUs if you build + with `USE_GPU_PRINTF=TRUE`(#2923, #2928) + + * Code clean-ups / sync with Microphysics (#2900, #2901, #2905, + #2906, #2909, #2912, #2919, #2922, #2932, #2933, #2936, #2938, + #2940) + + * The area weighting in the diagnostics in `subch_planar` was fixed + (#2885) + + * A script to produce a resolution study for the `circular_det` + problem was added (#2857) + + * science problem updates: `xrb_layered` (#2917), `nova` (#2913), + `wdmerger` (#2907, #2918, #2931), `Detonation` (#2902) + + * updated scaling results on Frontier (#2904, #2914, #2915) + + * more exact Riemann solver clean-up (#2896, #2897, #2898) + and clean-ups to the two shock solvers (#2895) + + * fix issues with eigenvectors and clang-tidy in the MHD solver + (#2880) + # 24.07 * Reorganizing of the existing 2-shock and HLL Riemann solvers diff --git a/Docs/source/faq.rst b/Docs/source/faq.rst index 2d806224a2..2b979926ba 100644 --- a/Docs/source/faq.rst +++ b/Docs/source/faq.rst @@ -17,31 +17,46 @@ Compiling There are 2 things you can do to check what’s happening. First, inspect the directories in ``VPATH_LOCATIONS``. This can be done via: - :: + .. prompt:: bash - make print-VPATH_LOCATIONS + make print-VPATH_LOCATIONS Next, ask make to tell you where it is finding each of the source files. This is done through a script ``find_files_vpath.py`` that is hooked into Castro’s build system. You can run this as: - :: + .. prompt:: bash - make file_locations + make file_locations At the end of the report, it will list any files it cannot find in - the vpath. Some of these are to be expected (like ``extern.f90`` - and ``buildInfo.cpp``—these are written at compile-time. But any - other missing files need to be investigated. + the vpath. Some of these are to be expected (like + ``buildInfo.cpp``—these are written at compile-time). But any other + missing files need to be investigated. + +#. *I put a copy of one of the header files (e.g. ``problem_tagging.H``) + in my problem setup but it does not seem to recognized / used by + the build system. Why doesn't my executable use my custom version + of the header?* + + This is likely due to compiler caching / ccache. You need to + clear the cache and the build: + + .. prompt:: bash + + ccache -C + make clean + + Then rebuild and it should be recognized. #. *I’m still having trouble compiling. How can I find out what all of the make variables are set to?* Use: - :: + .. prompt:: bash - make help + make help This will tell you the value of all the compilers and their options. @@ -104,7 +119,7 @@ Debugging Given a MultiFab ``mf``, you can dump out the state as: - :: + .. code:: c++ print_state(mf, IntVect(AMREX_D_DECL(10, 20, 30))); @@ -119,7 +134,7 @@ Debugging You can simply output a FAB to ``std::cout``. Imagine that you are in an MFIter loop, with a MultiFab ``mf``: - :: + .. code:: c++ S = FArrayBox& mf[mfi]; std::cout << S << std::endl; @@ -143,9 +158,9 @@ Profiling When you run, a file named ``gmon.out`` will be produced. This can be processed with gprof by running: - :: + .. prompt:: bash - gprof exec-name + gprof exec-name where *exec-name* is the name of the executable. More detailed line-by-line information can be obtained by passing the -l @@ -159,9 +174,9 @@ Managing Runs Create a file called ``dump_and_continue``, e.g., as: - :: + .. prompt:: bash - touch dump_and_continue + touch dump_and_continue This will force the code to output a checkpoint file that can be used to restart. Other options are ``plot_and_continue`` to output @@ -193,9 +208,9 @@ Managing Runs The build information (including git hashes, modules, EoS, network, etc.) can be displayed by running the executable as - :: + .. prompt:: bash - ./Castro.exe --describe + ./Castro.exe --describe .. _ch:faq:vis: diff --git a/Exec/Make.Castro b/Exec/Make.Castro index 5b1575820b..106ef51cc0 100644 --- a/Exec/Make.Castro +++ b/Exec/Make.Castro @@ -55,6 +55,9 @@ endif # Require C++17 CXXSTD := c++17 +# Use Lazy QueueReduction for the timing outputs +LAZY := TRUE + # default integrator INTEGRATOR_DIR ?= VODE diff --git a/Exec/hydro_tests/Noh/problem_bc_fill.H b/Exec/hydro_tests/Noh/problem_bc_fill.H index 3a6470744f..ea58ac7f35 100644 --- a/Exec/hydro_tests/Noh/problem_bc_fill.H +++ b/Exec/hydro_tests/Noh/problem_bc_fill.H @@ -29,7 +29,7 @@ void problem_bc_fill(int i, int j, int k, eos_t zone_state; zone_state.rho = rho_init * std::pow(1.0_rt + time / r, AMREX_SPACEDIM - 1); - zone_state.p = pres_init * std::pow(zone_state.rho / rho_init, 1.0_rt + eos_gamma); + zone_state.p = pres_init * std::pow(zone_state.rho / rho_init, 1.0_rt + eos_rp::eos_gamma); for (int n = 0; n < NumSpec; ++n) { zone_state.xn[n] = 1.0_rt / static_cast(NumSpec); } diff --git a/Exec/hydro_tests/RT/problem_initialize_state_data.H b/Exec/hydro_tests/RT/problem_initialize_state_data.H index 5c76f05ba3..ef1867125e 100644 --- a/Exec/hydro_tests/RT/problem_initialize_state_data.H +++ b/Exec/hydro_tests/RT/problem_initialize_state_data.H @@ -29,13 +29,13 @@ void problem_initialize_state_data (int i, int j, int k, if (r[AMREX_SPACEDIM-1] < problem::split[AMREX_SPACEDIM-1]) { Real pres = problem::p0_base - problem::rho_1 * r[AMREX_SPACEDIM-1]; - state(i,j,k,UEDEN) = pres / (eos_gamma - 1.0_rt); - state(i,j,k,UEINT) = pres / (eos_gamma - 1.0_rt); + state(i,j,k,UEDEN) = pres / (eos_rp::eos_gamma - 1.0_rt); + state(i,j,k,UEINT) = pres / (eos_rp::eos_gamma - 1.0_rt); } else { Real pres = presmid - problem::rho_2 * (r[AMREX_SPACEDIM-1] - problem::split[AMREX_SPACEDIM-1]); - state(i,j,k,UEDEN) = pres / (eos_gamma - 1.0_rt); - state(i,j,k,UEINT) = pres / (eos_gamma - 1.0_rt); + state(i,j,k,UEDEN) = pres / (eos_rp::eos_gamma - 1.0_rt); + state(i,j,k,UEINT) = pres / (eos_rp::eos_gamma - 1.0_rt); } Real pertheight; diff --git a/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize.H b/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize.H index fcb2e675d8..ac7925209c 100644 --- a/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize.H +++ b/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize.H @@ -13,11 +13,11 @@ void problem_initialize () // Define rho_0 - problem::rho_0 = std::pow(problem::p_ref, 1.0_rt/eos_gamma); + problem::rho_0 = std::pow(problem::p_ref, 1.0_rt/eos_rp::eos_gamma); // Define c_0 - problem::c_0 = std::sqrt(eos_gamma * problem::p_ref / problem::rho_0); + problem::c_0 = std::sqrt(eos_rp::eos_gamma * problem::p_ref / problem::rho_0); // Define r_c, radius of each vortex diff --git a/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize_state_data.H b/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize_state_data.H index 4ad9d2f6d6..19ffcbe555 100644 --- a/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize_state_data.H +++ b/Exec/hydro_tests/Vortices_LWAcoustics/problem_initialize_state_data.H @@ -60,7 +60,7 @@ void problem_initialize_state_data (int i, int j, int k, // internal energy - state(i,j,k,UEINT) = problem::p_ref / (eos_gamma - 1.0_rt); + state(i,j,k,UEINT) = problem::p_ref / (eos_rp::eos_gamma - 1.0_rt); // Total energy diff --git a/Exec/hydro_tests/acoustic_pulse/problem_initialize_state_data.H b/Exec/hydro_tests/acoustic_pulse/problem_initialize_state_data.H index 6ea18ace62..d63c4a8ffc 100644 --- a/Exec/hydro_tests/acoustic_pulse/problem_initialize_state_data.H +++ b/Exec/hydro_tests/acoustic_pulse/problem_initialize_state_data.H @@ -60,8 +60,8 @@ void problem_initialize_state_data (int i, int j, int k, state(i,j,k,UMZ) = 0.0_rt; // we are isentropic, so p = (dens/rho0)**Gamma_1 - Real p = std::pow(state(i,j,k,URHO) / problem::rho0, eos_gamma); - Real eint = p / (eos_gamma - 1.0_rt); + Real p = std::pow(state(i,j,k,URHO) / problem::rho0, eos_rp::eos_gamma); + Real eint = p / (eos_rp::eos_gamma - 1.0_rt); state(i,j,k,UEDEN) = eint; state(i,j,k,UEINT) = eint; diff --git a/Exec/hydro_tests/gamma_law_bubble/Problem_Derive.cpp b/Exec/hydro_tests/gamma_law_bubble/Problem_Derive.cpp index 5c82caa1a8..128640929d 100644 --- a/Exec/hydro_tests/gamma_law_bubble/Problem_Derive.cpp +++ b/Exec/hydro_tests/gamma_law_bubble/Problem_Derive.cpp @@ -177,8 +177,8 @@ void ca_derrhopert(const Box& bx, FArrayBox& derfab, int dcomp, int /*ncomp*/, if (problem::do_isentropic) { Real z = static_cast(j) * dx[AMREX_SPACEDIM-1]; density[j] = problem::dens_base * - std::pow((gravity::const_grav * problem::dens_base * (eos_gamma - 1.0_rt) * z/ - (eos_gamma * problem::pres_base) + 1.0_rt), 1.0_rt/(eos_gamma - 1.0_rt)); + std::pow((gravity::const_grav * problem::dens_base * (eos_rp::eos_gamma - 1.0_rt) * z/ + (eos_rp::eos_gamma * problem::pres_base) + 1.0_rt), 1.0_rt/(eos_rp::eos_gamma - 1.0_rt)); } else { Real z = (static_cast(j) + 0.5_rt) * dx[AMREX_SPACEDIM-1]; density[j] = problem::dens_base * std::exp(-z/H); diff --git a/Exec/hydro_tests/gamma_law_bubble/initial_model.H b/Exec/hydro_tests/gamma_law_bubble/initial_model.H index 2ab295d96b..1335d8414d 100644 --- a/Exec/hydro_tests/gamma_law_bubble/initial_model.H +++ b/Exec/hydro_tests/gamma_law_bubble/initial_model.H @@ -66,8 +66,8 @@ generate_initial_model(const int npts_model, const Real xmin, const Real xmax, density = model_params.dens_base * std::pow(gravity::const_grav * model_params.dens_base * - (eos_gamma - 1.0) * z / - (eos_gamma * model_params.p_base) + 1.0_rt, 1.0_rt/(eos_gamma - 1.0_rt)); + (eos_rp::eos_gamma - 1.0) * z / + (eos_rp::eos_gamma * model_params.p_base) + 1.0_rt, 1.0_rt/(eos_rp::eos_gamma - 1.0_rt)); } else { Real z = xmin + (static_cast(i) + 0.5_rt) * dx; diff --git a/Exec/hydro_tests/gamma_law_bubble/prob_util.H b/Exec/hydro_tests/gamma_law_bubble/prob_util.H index 96c0ebd21c..b67f2182c5 100644 --- a/Exec/hydro_tests/gamma_law_bubble/prob_util.H +++ b/Exec/hydro_tests/gamma_law_bubble/prob_util.H @@ -29,8 +29,8 @@ gamma_law_initial_model(Real* const pressure, Real* const density, Real* const t if (problem::do_isentropic) { Real z = static_cast(j) * dx[AMREX_SPACEDIM-1]; density[j] = problem::dens_base * - std::pow((gravity::const_grav * problem::dens_base * (eos_gamma - 1.0_rt) * z/ - (eos_gamma * problem::pres_base) + 1.0_rt), 1.0_rt/(eos_gamma - 1.0_rt)); + std::pow((gravity::const_grav * problem::dens_base * (eos_rp::eos_gamma - 1.0_rt) * z/ + (eos_rp::eos_gamma * problem::pres_base) + 1.0_rt), 1.0_rt/(eos_rp::eos_gamma - 1.0_rt)); } else { Real z = (static_cast(j) + 0.5_rt) * dx[AMREX_SPACEDIM-1]; density[j] = problem::dens_base * std::exp(-z/H); diff --git a/Exec/hydro_tests/riemann_2d/problem_initialize_state_data.H b/Exec/hydro_tests/riemann_2d/problem_initialize_state_data.H index 95cc0b33f4..7f33ea2373 100644 --- a/Exec/hydro_tests/riemann_2d/problem_initialize_state_data.H +++ b/Exec/hydro_tests/riemann_2d/problem_initialize_state_data.H @@ -54,8 +54,8 @@ void problem_initialize_state_data(int i, int j, int k, state(i, j, k, UMX) = state(i, j, k, URHO) * problem::ux_3; state(i, j, k, UMY) = state(i, j, k, URHO) * problem::uy_3; - state(i, j, k, UEINT) = problem::p_3 / (eos_gamma - 1.0_rt); - state(i, j, k, UEDEN) = problem::p_3 / (eos_gamma - 1.0_rt) + state(i, j, k, UEINT) = problem::p_3 / (eos_rp::eos_gamma - 1.0_rt); + state(i, j, k, UEDEN) = problem::p_3 / (eos_rp::eos_gamma - 1.0_rt) + 0.5 * problem::rho_3 * problem::ux_3 * problem::ux_3 + 0.5 * problem::rho_3 * problem::uy_3 * problem::uy_3; @@ -66,9 +66,9 @@ void problem_initialize_state_data(int i, int j, int k, state(i, j, k, UMX) = state(i, j, k, URHO) * problem::ux_4; state(i, j, k, UMY) = state(i, j, k, URHO) * problem::uy_4; - state(i, j, k, UEINT) = problem::p_4 / (eos_gamma - 1.0_rt); + state(i, j, k, UEINT) = problem::p_4 / (eos_rp::eos_gamma - 1.0_rt); - state(i, j, k, UEDEN) = problem::p_4 / (eos_gamma - 1.0_rt) + state(i, j, k, UEDEN) = problem::p_4 / (eos_rp::eos_gamma - 1.0_rt) + 0.5 * problem::rho_4 * problem::ux_4 * problem::ux_4 + 0.5 * problem::rho_4 * problem::uy_4 * problem::uy_4; @@ -79,9 +79,9 @@ void problem_initialize_state_data(int i, int j, int k, state(i, j, k, UMX) = state(i, j, k, URHO) * problem::ux_2; state(i, j, k, UMY) = state(i, j, k, URHO) * problem::uy_2; - state(i, j, k, UEINT) = problem::p_2 / (eos_gamma - 1.0_rt); + state(i, j, k, UEINT) = problem::p_2 / (eos_rp::eos_gamma - 1.0_rt); - state(i, j, k, UEDEN) = problem::p_2 / (eos_gamma - 1.0_rt) + state(i, j, k, UEDEN) = problem::p_2 / (eos_rp::eos_gamma - 1.0_rt) + 0.5 * problem::rho_2 * problem::ux_2 * problem::ux_2 + 0.5 * problem::rho_2 * problem::uy_2 * problem::uy_2; } else if (xx > problem::center[0] & yy > problem::center[1]) @@ -91,9 +91,9 @@ void problem_initialize_state_data(int i, int j, int k, state(i, j, k, UMX) = state(i, j, k, URHO) * problem::ux_1; state(i, j, k, UMY) = state(i, j, k, URHO) * problem::uy_1; - state(i, j, k, UEINT) = problem::p_1 / (eos_gamma - 1.0_rt); + state(i, j, k, UEINT) = problem::p_1 / (eos_rp::eos_gamma - 1.0_rt); - state(i, j, k, UEDEN) = problem::p_1 / (eos_gamma - 1.0_rt) + state(i, j, k, UEDEN) = problem::p_1 / (eos_rp::eos_gamma - 1.0_rt) + 0.5 * problem::rho_1 * problem::ux_1 * problem::ux_1 + 0.5 * problem::rho_1 * problem::uy_1 * problem::uy_1; } diff --git a/Exec/mhd_tests/RT/problem_initialize_state_data.H b/Exec/mhd_tests/RT/problem_initialize_state_data.H index bb0aed8046..ee35b672b8 100644 --- a/Exec/mhd_tests/RT/problem_initialize_state_data.H +++ b/Exec/mhd_tests/RT/problem_initialize_state_data.H @@ -29,12 +29,12 @@ void problem_initialize_state_data (int i, int j, int k, Real pres; if (r[AMREX_SPACEDIM-1] < problem::split[AMREX_SPACEDIM-1]) { pres = problem::p0_base - problem::rho_1 * r[AMREX_SPACEDIM-1]; - state(i,j,k,UEDEN) = pres / (eos_gamma - 1.0_rt); - state(i,j,k,UEINT) = pres / (eos_gamma - 1.0_rt); + state(i,j,k,UEDEN) = pres / (eos_rp::eos_gamma - 1.0_rt); + state(i,j,k,UEINT) = pres / (eos_rp::eos_gamma - 1.0_rt); } else { pres = presmid - problem::rho_2 * (r[AMREX_SPACEDIM-1] - problem::split[AMREX_SPACEDIM-1]); - state(i,j,k,UEDEN) = pres / (eos_gamma - 1.0_rt); - state(i,j,k,UEINT) = pres / (eos_gamma - 1.0_rt); + state(i,j,k,UEDEN) = pres / (eos_rp::eos_gamma - 1.0_rt); + state(i,j,k,UEINT) = pres / (eos_rp::eos_gamma - 1.0_rt); } // doing it similar to 2d, will be something in x-z though @@ -61,4 +61,3 @@ void problem_initialize_state_data (int i, int j, int k, } #endif - diff --git a/Exec/science/Detonation/ci-benchmarks/sdc_det_plt00040_extrema.out b/Exec/science/Detonation/ci-benchmarks/sdc_det_plt00040_extrema.out index 51ab093e30..b072c6e901 100644 --- a/Exec/science/Detonation/ci-benchmarks/sdc_det_plt00040_extrema.out +++ b/Exec/science/Detonation/ci-benchmarks/sdc_det_plt00040_extrema.out @@ -1,79 +1,79 @@ plotfile = det_x_plt00040 time = 5.1558159140336702e-06 variables minimum value maximum value - density 185259874.21 216582994.41 - xmom -87234240329 2.9512596512e+16 + density 185257544.55 216468146.43 + xmom -37913405153 2.9568274388e+16 ymom 0 0 zmom 0 0 - rho_E 1.3062473821e+26 2.7893784326e+26 - rho_e 1.3062473821e+26 2.7751188263e+26 - Temp 50000000.026 7845854662.6 - rho_H1 2.1207307865e-22 0.020000135717 - rho_He3 0.0017225013714 0.02102428438 - rho_He4 94358881.486 200001799.59 - rho_C12 0.020000000216 21276964.112 - rho_N14 1.9999983474e-22 197.76685905 - rho_O16 0.02 19209.489937 - rho_Ne20 0.02 4937.0489429 - rho_Mg24 0.02 23322.104606 - rho_Si28 0.02 2015343.4134 - rho_S32 0.02 1656264.8265 - rho_Ar36 0.02 821829.69632 - rho_Ca40 0.02 725030.13946 - rho_Ti44 0.02 34172.699418 - rho_Cr48 0.02 78131.560865 - rho_Fe52 0.02 277298.50875 - rho_Fe54 0.02 94994585.838 - rho_Ni56 0.02 2238978.4894 - rho_n 2.1207307865e-22 234580.14761 - rho_p 0.019999995438 3609721.5282 - rho_enuc -4.6900452682e+29 3.5791669148e+32 - pressure 5.5236728651e+25 1.1610544319e+26 - kineng 0 2.0455630311e+24 - soundspeed 612864631.21 895226067.36 - Gamma_1 1.3599756137 1.3820271358 - MachNumber 0 0.16114390973 - uplusc 612864631.21 999569362.18 - uminusc -895226468.14 -612860078.6 - entropy 98214767.758 336273451.62 + rho_E 1.3062473821e+26 2.7893330499e+26 + rho_e 1.3062473821e+26 2.77508064e+26 + Temp 50000000.026 7845891849.2 + rho_H1 2.1211722106e-22 0.020000096415 + rho_He3 0.0017224293869 0.021023770747 + rho_He4 94366787.107 200001406.23 + rho_C12 0.020000000216 21736627.589 + rho_N14 1.9999999481e-22 0.020000141427 + rho_O16 0.02 21564.50879 + rho_Ne20 0.02 4189.5544151 + rho_Mg24 0.02 23323.799421 + rho_Si28 0.02 2015729.7513 + rho_S32 0.02 1656696.6915 + rho_Ar36 0.02 822096.59228 + rho_Ca40 0.02 724138.29448 + rho_Ti44 0.02 34187.547129 + rho_Cr48 0.02 78085.100557 + rho_Fe52 0.02 278150.27827 + rho_Fe54 0.02 94990080.243 + rho_Ni56 0.02 2242328.7066 + rho_n 2.1211722106e-22 234640.58163 + rho_p 0.019999995438 3609403.7361 + rho_enuc -4.7086157677e+29 3.5777644631e+32 + pressure 5.5236728651e+25 1.1610599873e+26 + kineng 0 2.0519264828e+24 + soundspeed 612864631.21 895228345.47 + Gamma_1 1.3599756425 1.3822442674 + MachNumber 0 0.16119509216 + uplusc 612864631.21 999919125.29 + uminusc -895228523.09 -612861073.53 + entropy 98214767.758 336273484.2 magvort 0 0 - divu -97746.019905 33801.828895 - eint_E 6.5312369103e+17 1.3804343879e+18 - eint_e 6.5312369103e+17 1.3804343879e+18 - logden 8.2677813648 8.3356243538 - StateErr_0 185259874.21 216582994.41 - StateErr_1 50000000.026 7845854662.6 + divu -97996.218171 33763.099005 + eint_E 6.5312369103e+17 1.3804410664e+18 + eint_e 6.5312369103e+17 1.3804410664e+18 + logden 8.2677759035 8.3353939984 + StateErr_0 185257544.55 216468146.43 + StateErr_1 50000000.026 7845891849.2 StateErr_2 1e-30 9.9999779324e-11 X(H1) 1e-30 9.9999779324e-11 - X(He3) 8.9857380636e-12 9.9999601254e-11 - X(He4) 0.48403393423 0.9999999982 - X(C12) 1.0000000108e-10 0.10032845398 - X(N14) 1e-30 9.3254108586e-07 - X(O16) 1e-10 9.604742181e-05 - X(Ne20) 1e-10 2.3279941869e-05 - X(Mg24) 1e-10 0.00011116921103 - X(Si28) 1e-10 0.01005607083 - X(S32) 1e-10 0.0083471961707 - X(Ar36) 1e-10 0.0041657025725 - X(Ca40) 1e-10 0.0037098568497 - X(Ti44) 1e-10 0.00017298041086 - X(Cr48) 1e-10 0.00040079244474 - X(Fe52) 1e-10 0.0013382570124 - X(Fe54) 1e-10 0.46490351089 - X(Ni56) 1e-10 0.010337739099 - X(n) 1e-30 0.0011729003976 - X(p) 9.9999977071e-11 0.017596365447 - abar 4.000000001 6.7314014168 - Ye 0.49998670317 0.50001557187 - x_velocity -436.17123215 138623047.3 + X(He3) 8.985892423e-12 9.9999601254e-11 + X(He4) 0.48406961191 0.9999999982 + X(C12) 1.0000000108e-10 0.10247460098 + X(N14) 1e-30 1.0000000384e-10 + X(O16) 1e-10 0.00010166316852 + X(Ne20) 1e-10 1.9751128146e-05 + X(Mg24) 1e-10 0.00011117683475 + X(Si28) 1e-10 0.010049207028 + X(S32) 1e-10 0.0083399755634 + X(Ar36) 1e-10 0.0041613939849 + X(Ca40) 1e-10 0.003708082998 + X(Ti44) 1e-10 0.00017289116777 + X(Cr48) 1e-10 0.00040055008209 + X(Fe52) 1e-10 0.0013373223366 + X(Fe54) 1e-10 0.46488586209 + X(Ni56) 1e-10 0.010358700546 + X(n) 1e-30 0.0011732028047 + X(p) 9.9999977002e-11 0.017592025438 + abar 4.000000001 6.7312151705 + Ye 0.49998627651 0.50001570244 + x_velocity -189.56702545 138792440.56 y_velocity 0 0 z_velocity 0 0 - t_sound_t_enuc 3.4412406685e-13 0.97531384976 - enuc -2.4936491578e+21 1.6525613771e+24 - magvel 0 138623047.3 - radvel -436.17123215 138623047.3 + t_sound_t_enuc 3.4412406685e-13 0.97569821534 + enuc -2.5034688832e+21 1.6527902706e+24 + magvel 0 138792440.56 + radvel -189.56702545 138792440.56 circvel 0 2 - magmom 0 2.9512596512e+16 + magmom 0 2.9568274388e+16 angular_momentum_x 0 0 angular_momentum_y 0 0 angular_momentum_z 0 0 diff --git a/Exec/science/flame_wave/inputs_H_He/inputs.H_He.nonsquare.static.1000Hz.pslope.cool b/Exec/science/flame_wave/inputs_H_He/inputs.H_He.nonsquare.static.1000Hz.pslope.cool new file mode 100644 index 0000000000..5f440f7d36 --- /dev/null +++ b/Exec/science/flame_wave/inputs_H_He/inputs.H_He.nonsquare.static.1000Hz.pslope.cool @@ -0,0 +1,168 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 250000000 +stop_time = 3.0 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 +geometry.coord_sys = 1 # 0 => cart, 1 => RZ 2=>spherical +geometry.prob_lo = 0 0 +geometry.prob_hi = 9.8304e4 2.4576e4 +amr.n_cell = 384 192 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 3 3 +castro.hi_bc = 2 2 + +castro.fill_ambient_bc = 1 +castro.ambient_fill_dir = 1 +castro.ambient_outflow_vel = 1 + +castro.allow_non_unit_aspect_zones = 1 + +castro.domain_is_plane_parallel = 1 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 1 +castro.do_rotation = 1 +castro.do_grav = 1 +castro.do_sponge = 1 + +castro.small_temp = 1.e6 +castro.small_dens = 1.e-5 + +castro.ppm_type = 1 +castro.grav_source_type = 2 +castro.use_pslope = 1 +castro.pslope_cutoff_density = 1.e4 + +gravity.gravity_type = ConstantGrav +gravity.const_grav = -1.5e14 + +castro.rotational_period = 0.001 +castro.rotation_include_centrifugal = 0 + +castro.diffuse_temp = 1 +castro.diffuse_cutoff_density_hi = 5.e4 +castro.diffuse_cutoff_density = 2.e4 + +castro.diffuse_cond_scale_fac = 1.0 + +castro.react_rho_min = 1.e2 +castro.react_rho_max = 5.e6 + +castro.react_T_min = 6.e7 + +castro.sponge_upper_density = 1.e2 +castro.sponge_lower_density = 1.e0 +castro.sponge_timescale = 1.e-7 + +# GPU options +castro.hydro_memory_footprint_ratio = 3 + +# TIME STEP CONTROL +castro.cfl = 0.8 # cfl number for hyperbolic system +castro.init_shrink = 0.1 # scale back initial timestep +castro.change_max = 1.1 # max time step growth + +castro.use_retry = 1 +castro.max_subcycles = 16 + +castro.retry_small_density_cutoff = 10.0 + +castro.abundance_failure_tolerance = 0.1 +castro.abundance_failure_rho_cutoff = 1.0 + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 100 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp +amr.run_log = run_log +amr.run_log_terse = run_log_terse + +# REFINEMENT / REGRIDDING +amr.max_level = 2 # maximum level number allowed +amr.ref_ratio = 4 2 2 2 # refinement ratio +amr.regrid_int = 0 # static grids ftw +amr.blocking_factor = 32 # block factor in grid generation +amr.max_grid_size = 128 +amr.n_error_buf = 2 2 2 2 # number of buffer cells in error est + +# CHECKPOINT FILES +amr.check_file = flame_wave_chk # root name of checkpoint file +amr.check_int = 1000 # number of timesteps between checkpoints + +amr.checkpoint_files_output = 1 + +# PLOTFILES +amr.plot_file = flame_wave_H_He_plt # root name of plotfile +amr.plot_per = 2.e-3 # number of seconds between plotfiles +amr.derive_plot_vars = ALL +amr.plot_files_output = 1 + +amr.small_plot_file = flame_wave_H_He_smallplt # root name of plotfile +amr.small_plot_per = 2.e-4 # number of seconds between plotfiles +amr.small_plot_vars = density Temp +amr.derive_small_plot_vars = abar x_velocity y_velocity z_velocity X(H1) X(He4) X(ash) enuc + +# write plotfiles in single-precision +fab.format = NATIVE_32 + +amr.file_name_digits = 7 # pad step number with zeros if needed + +# don't write plotfiles when a stop is requested with dump_and_stop +amr.write_plotfile_with_checkpoint = 0 +castro.output_at_completion = 0 + +# problem initialization + +problem.dtemp = 1.2e9 # added to T_hi in the hot region +problem.x_half_max = 2.56e4 +problem.x_half_width = 2048.e0 + +problem.dens_base = 3.43e6 + +problem.T_star = 2.e8 +problem.T_hi = 2.e8 +problem.T_lo = 8.e6 + +problem.H_star = 2000.e0 +problem.atm_delta = 50.0 + +# helper values for initial composition +other_frac = 0.01 +h_frac = 0.10 + +problem.fuel1_name = "hydrogen-1" +problem.fuel1_frac = h_frac + +problem.fuel2_name = "helium-4" +problem.fuel2_frac = "1.0 - h_frac - other_frac" + +problem.fuel3_name = "oxygen-14" +problem.fuel3_frac = "0.35 * other_frac" + +problem.fuel4_name = "oxygen-15" +problem.fuel4_frac = "0.65 * other_frac" + +problem.ash1_name = "nickel-56" +problem.ash1_frac = 1.0 + +problem.low_density_cutoff = 1.e-4 + +problem.tag_by_density = 0 +problem.refine_height = 3600.0 +problem.max_base_tagging_level = 3 + +# Microphysics + +integrator.rtol_spec = 1.e-5 +integrator.atol_spec = 1.e-5 + +integrator.use_burn_retry = 1 + +network.use_tables = 1 diff --git a/Exec/science/subch_planar/Problem_Derive.cpp b/Exec/science/subch_planar/Problem_Derive.cpp index 2e97187e44..3275166238 100644 --- a/Exec/science/subch_planar/Problem_Derive.cpp +++ b/Exec/science/subch_planar/Problem_Derive.cpp @@ -273,12 +273,15 @@ void ca_dergradpoverp1(const Box& bx, FArrayBox& derfab, int /*dcomp*/, int /*nc Real vm = dat(i,j-1,k,UMY) / dat(i,j-1,k,URHO); Real v0 = dat(i,j,k,UMY) / dat(i,j,k,URHO); + Real du_x{}; + Real dv_y{}; + // construct div{U} if (coord_type == 0) { // Cartesian - div_u += 0.5_rt * (up - um) * dxinv; - div_u += 0.5_rt * (vp - vm) * dyinv; + du_x = 0.5_rt * (up - um) * dxinv; + dv_y = 0.5_rt * (vp - vm) * dyinv; } else if (coord_type == 1) { @@ -287,8 +290,8 @@ void ca_dergradpoverp1(const Box& bx, FArrayBox& derfab, int /*dcomp*/, int /*nc Real rm = (i - 1 + 0.5_rt) * dx[0]; Real rp = (i + 1 + 0.5_rt) * dx[0]; - div_u += 0.5_rt * (rp * up - rm * um) / (rc * dx[0]) + - 0.5_rt * (vp - vm) * dyinv; + du_x = 0.5_rt * (rp * up - rm * um) / (rc * dx[0]); + dv_y = 0.5_rt * (vp - vm) * dyinv; #ifndef AMREX_USE_GPU } else { @@ -296,6 +299,8 @@ void ca_dergradpoverp1(const Box& bx, FArrayBox& derfab, int /*dcomp*/, int /*nc #endif } + div_u = du_x + dv_y; + // we need to compute p in the full stencil Real p_ip1{}; @@ -399,12 +404,12 @@ void ca_dergradpoverp1(const Box& bx, FArrayBox& derfab, int /*dcomp*/, int /*nc Real dP_y = 0.5_rt * (p_jp1 - p_jm1); //Real gradPdx_over_P = std::sqrt(dP_x * dP_x + dP_y * dP_y + dP_z * dP_z) / dat(i,j,k,QPRES); - Real du_x = std::min(up - um, 0.0); - Real dv_y = std::min(vp - vm, 0.0); + Real cdu_x = std::min(du_x, 0.0); + Real cdv_y = std::min(dv_y, 0.0); - Real divu_mag = std::sqrt(du_x * du_x + dv_y * dv_y + 1.e-30); + Real divu_mag = std::sqrt(cdu_x * cdu_x + cdv_y * cdv_y + 1.e-30); - Real gradPdx_over_P = std::abs(dP_x * du_x + dP_y * dv_y) / divu_mag; + Real gradPdx_over_P = std::abs(dP_x * cdu_x + dP_y * cdv_y) / divu_mag; gradPdx_over_P /= p_zone; der(i,j,k,0) = gradPdx_over_P; diff --git a/Exec/science/wdmerger/GNUmakefile b/Exec/science/wdmerger/GNUmakefile index 45866a9d03..bd78251741 100644 --- a/Exec/science/wdmerger/GNUmakefile +++ b/Exec/science/wdmerger/GNUmakefile @@ -20,10 +20,10 @@ NUM_MODELS = 2 # define the location of the CASTRO top directory CASTRO_HOME ?= ../../.. -# Turn on hybrid momentum, but note that it only is used in 3D at present. +# Turn on hybrid momentum? but note that it only is used in 3D at present. ifeq ($(DIM), 3) - USE_HYBRID_MOMENTUM ?= TRUE + USE_HYBRID_MOMENTUM ?= FALSE endif # Set the default integrator for the burner. diff --git a/Exec/science/wdmerger/ci-benchmarks/wdmerger_collision_2D.out b/Exec/science/wdmerger/ci-benchmarks/wdmerger_collision_2D.out index 6d11ada2e9..88fbaf4f88 100644 --- a/Exec/science/wdmerger/ci-benchmarks/wdmerger_collision_2D.out +++ b/Exec/science/wdmerger/ci-benchmarks/wdmerger_collision_2D.out @@ -5,16 +5,16 @@ xmom -5.4964100651e+14 1.3559128302e+14 ymom -2.5530096328e+15 2.5530122744e+15 zmom 0 0 - rho_E 7.4982062146e+11 5.0669247218e+24 - rho_e 7.1077581849e+11 5.0640768325e+24 - Temp 242288.68588 1409652233.5 - rho_He4 8.693611703e-17 3.5999032981 + rho_E 7.4982062146e+11 5.0669247219e+24 + rho_e 7.1077581849e+11 5.0640768326e+24 + Temp 242288.68588 1409652233.6 + rho_He4 8.693611703e-17 3.599903302 rho_C12 3.4774446812e-05 7825956.6934 rho_O16 5.2161670217e-05 11739149.75 - rho_Ne20 8.693611703e-17 181951.05664 - rho_Mg24 8.693611703e-17 1192.7969626 - rho_Si28 8.693611703e-17 6.6913702458 - rho_S32 8.693611703e-17 0.00019493291444 + rho_Ne20 8.693611703e-17 181951.0571 + rho_Mg24 8.693611703e-17 1192.7969729 + rho_Si28 8.693611703e-17 6.6913702949 + rho_S32 8.693611703e-17 0.00019493291655 rho_Ar36 8.693611703e-17 1.9565534609e-05 rho_Ca40 8.693611703e-17 1.9565534331e-05 rho_Ti44 8.693611703e-17 1.9565534308e-05 @@ -25,5 +25,5 @@ grav_x -684991644 -51428.243166 grav_y -739606241.84 739606820.44 grav_z 0 0 - rho_enuc -1.9506340641e+12 7.6429034765e+23 + rho_enuc -2.7633982574e+12 7.6429034885e+23 diff --git a/Source/diffusion/Castro_diffusion.cpp b/Source/diffusion/Castro_diffusion.cpp index c02fc3f611..0a2a98fbce 100644 --- a/Source/diffusion/Castro_diffusion.cpp +++ b/Source/diffusion/Castro_diffusion.cpp @@ -22,15 +22,16 @@ Castro::construct_old_diff_source(MultiFab& source, MultiFab& state_in, Real tim if (verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; - + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_old_diff_source() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_old_diff_source() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif @@ -59,15 +60,17 @@ Castro::construct_new_diff_source(MultiFab& source, MultiFab& state_old, MultiFa if (verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_new_diff_source() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_new_diff_source() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif diff --git a/Source/driver/Castro.H b/Source/driver/Castro.H index 8c7b9c13f8..a3c6983c4a 100644 --- a/Source/driver/Castro.H +++ b/Source/driver/Castro.H @@ -1341,7 +1341,7 @@ protected: /// /// Flag for indicating that we want to save prev_state until the reflux. /// - bool keep_prev_state; + bool keep_prev_state{}; #ifdef TRUE_SDC @@ -1418,8 +1418,8 @@ protected: /// -/// for keeping track of the amount of CPU time used -- this will persist -/// after restarts +/// for keeping track of the amount of CPU or GPU time used -- this will persist +/// after restarts /// static amrex::Real previousCPUTimeUsed; static amrex::Real startCPUTime; diff --git a/Source/driver/Castro.cpp b/Source/driver/Castro.cpp index 3108b7794f..0600a16d1c 100644 --- a/Source/driver/Castro.cpp +++ b/Source/driver/Castro.cpp @@ -566,7 +566,7 @@ Castro::read_params () // in Amr::InitAmr(), right before the ParmParse checks, so if the user opts to // override our overriding, they can do so. - Amr::setComputeNewDtOnRegrid(1); + Amr::setComputeNewDtOnRegrid(true); // Read in custom refinement scheme. @@ -3048,14 +3048,16 @@ Castro::reflux (int crse_level, int fine_level, bool in_post_timestep) if (verbose) { const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real end = ParallelDescriptor::second() - strt; + amrex::Real end = ParallelDescriptor::second() - strt; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(end,IOProc); if (ParallelDescriptor::IOProcessor()) { - std::cout << "Castro::reflux() at level " << level << " : time = " << end << std::endl; + std::cout << "Castro::reflux() at level " << llevel + << " : time = " << end << std::endl; } #ifdef BL_LAZY }); @@ -3123,6 +3125,9 @@ Castro::normalize_species (MultiFab& S_new, int ng) X > 1.0_rt + castro::abundance_failure_tolerance) { #ifndef AMREX_USE_GPU std::cout << "(i, j, k) = " << i << " " << j << " " << k << " " << ", X[" << n << "] = " << X << " (density here is: " << u(i,j,k,URHO) << ")" << std::endl; +#elif defined(ALLOW_GPU_PRINTF) + AMREX_DEVICE_PRINTF("(i, j, k) = %d %d %d, X[%d] = %g (density here is: %g)\n", + i, j, k, n, X, u(i,j,k,URHO)); #endif } } diff --git a/Source/driver/Castro_io.cpp b/Source/driver/Castro_io.cpp index b0c58b5da2..93ea8d7ab6 100644 --- a/Source/driver/Castro_io.cpp +++ b/Source/driver/Castro_io.cpp @@ -564,8 +564,13 @@ Castro::writeJobInfo (const std::string& dir, const Real io_time) jobInfoFile << "hydro tile size: " << hydro_tile_size << "\n"; jobInfoFile << "\n"; +#ifdef AMREX_USE_GPU + jobInfoFile << "GPU time used since start of simulation (GPU-hours): " << + getCPUTime()/3600.0; +#else jobInfoFile << "CPU time used since start of simulation (CPU-hours): " << getCPUTime()/3600.0; +#endif jobInfoFile << "\n\n"; diff --git a/Source/driver/_cpp_parameters b/Source/driver/_cpp_parameters index f5cdc29bd3..44f78e6ebf 100644 --- a/Source/driver/_cpp_parameters +++ b/Source/driver/_cpp_parameters @@ -29,7 +29,7 @@ update_sources_after_reflux bool 1 # Castro was originally written assuming dx = dy = dz. This assumption is # enforced at runtime. Setting allow_non_unit_aspect_zones = 1 opts out. -allow_non_unit_aspect_zones int 0 +allow_non_unit_aspect_zones bool 0 #----------------------------------------------------------------------------- diff --git a/Source/gravity/Castro_gravity.cpp b/Source/gravity/Castro_gravity.cpp index bf9dc4f6b7..395e0e675e 100644 --- a/Source/gravity/Castro_gravity.cpp +++ b/Source/gravity/Castro_gravity.cpp @@ -122,15 +122,17 @@ Castro::construct_old_gravity (Real time) if (verbose > 0) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_old_gravity() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_old_gravity() time = " << run_time << " on level " + << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif @@ -278,15 +280,17 @@ Castro::construct_new_gravity (Real time) if (verbose > 0) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_new_gravity() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_new_gravity() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif @@ -431,14 +435,16 @@ void Castro::construct_old_gravity_source(MultiFab& source, MultiFab& state_in, if (castro::verbose > 1) { const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_old_gravity_source() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_old_gravity_source() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif @@ -667,15 +673,17 @@ void Castro::construct_new_gravity_source(MultiFab& source, MultiFab& state_old, if (castro::verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_new_gravity_source() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_new_gravity_source() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif diff --git a/Source/gravity/Gravity.cpp b/Source/gravity/Gravity.cpp index 72a2565935..18eebefbfc 100644 --- a/Source/gravity/Gravity.cpp +++ b/Source/gravity/Gravity.cpp @@ -457,13 +457,15 @@ Gravity::solve_for_phi (int level, if (gravity::verbose) { const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real end = ParallelDescriptor::second() - strt; + amrex::Real end = ParallelDescriptor::second() - strt; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(end,IOProc); - amrex::Print() << "Gravity::solve_for_phi() time = " << end << " on level " << level << std::endl << std::endl; + amrex::Print() << "Gravity::solve_for_phi() time = " << end << " on level " + << llevel << std::endl << std::endl; #ifdef BL_LAZY }); #endif diff --git a/Source/hydro/Castro_ctu_hydro.cpp b/Source/hydro/Castro_ctu_hydro.cpp index 68c93bf857..d443bbe060 100644 --- a/Source/hydro/Castro_ctu_hydro.cpp +++ b/Source/hydro/Castro_ctu_hydro.cpp @@ -1419,12 +1419,13 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) // NOLINT(readability-co #ifdef RADIATION if (radiation->verbose>=1) { + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceIntMax(nstep_fsp, ParallelDescriptor::IOProcessorNumber()); if (ParallelDescriptor::IOProcessor() && nstep_fsp > 0) { - std::cout << "Radiation f-space advection on level " << level + std::cout << "Radiation f-space advection on level " << llevel << " takes as many as " << nstep_fsp; if (nstep_fsp == 1) { std::cout<< " substep.\n"; @@ -1482,15 +1483,17 @@ Castro::construct_ctu_hydro_source(Real time, Real dt) // NOLINT(readability-co if (verbose > 0) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_ctu_hydro_source() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_ctu_hydro_source() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif diff --git a/Source/hydro/Castro_hybrid.cpp b/Source/hydro/Castro_hybrid.cpp index b1bdabc4f9..0191587b6f 100644 --- a/Source/hydro/Castro_hybrid.cpp +++ b/Source/hydro/Castro_hybrid.cpp @@ -22,8 +22,8 @@ Castro::construct_old_hybrid_source(MultiFab& source, MultiFab& state_old, Real if (verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { @@ -65,8 +65,8 @@ Castro::construct_new_hybrid_source(MultiFab& source, MultiFab& state_old, Multi if (verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { diff --git a/Source/hydro/Castro_mol_hydro.cpp b/Source/hydro/Castro_mol_hydro.cpp index 6eb352ac14..9fee21334d 100644 --- a/Source/hydro/Castro_mol_hydro.cpp +++ b/Source/hydro/Castro_mol_hydro.cpp @@ -743,8 +743,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, MultiFab& A_update) if (verbose > 0) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { diff --git a/Source/hydro/advection_util.cpp b/Source/hydro/advection_util.cpp index ec7505a438..095192931b 100644 --- a/Source/hydro/advection_util.cpp +++ b/Source/hydro/advection_util.cpp @@ -302,49 +302,52 @@ Castro::apply_av(const Box& bx, Real diff_coeff = difmag; - amrex::ParallelFor(bx, NUM_STATE, - [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept { - if (n == UTEMP) { - return; - } + Real div1; + if (idir == 0) { + div1 = 0.25_rt * (div(i,j,k) + div(i,j+dg1,k) + + div(i,j,k+dg2) + div(i,j+dg1,k+dg2)); + } else if (idir == 1) { + div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + + div(i,j,k+dg2) + div(i+1,j,k+dg2)); + } else { + div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + + div(i,j+dg1,k) + div(i+1,j+dg1,k)); + } + + div1 = diff_coeff * std::min(0.0_rt, div1); + + for (int n = 0; n < NUM_STATE; ++n) { + + if (n == UTEMP) { + continue; + } #ifdef SHOCK_VAR - if (n == USHK) { - return; - } + if (n == USHK) { + continue; + } #endif - #ifdef NSE_NET - if (n == UMUP || n == UMUN) { - return; - } + if (n == UMUP || n == UMUN) { + continue; + } #endif - Real div1; - if (idir == 0) { - div1 = 0.25_rt * (div(i,j,k) + div(i,j+dg1,k) + - div(i,j,k+dg2) + div(i,j+dg1,k+dg2)); - div1 = diff_coeff * amrex::min(0.0_rt, div1); - div1 = div1 * (uin(i,j,k,n) - uin(i-1,j,k,n)); - - } else if (idir == 1) { - - div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + - div(i,j,k+dg2) + div(i+1,j,k+dg2)); - div1 = diff_coeff * amrex::min(0.0_rt, div1); - div1 = div1 * (uin(i,j,k,n) - uin(i,j-dg1,k,n)); - - } else { + Real div_var{}; - div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + - div(i,j+dg1,k) + div(i+1,j+dg1,k)); - div1 = diff_coeff * amrex::min(0.0_rt, div1); - div1 = div1 * (uin(i,j,k,n) - uin(i,j,k-dg2,n)); + if (idir == 0) { + div_var = div1 * (uin(i,j,k,n) - uin(i-1,j,k,n)); + } else if (idir == 1) { + div_var = div1 * (uin(i,j,k,n) - uin(i,j-dg1,k,n)); + } else { + div_var = div1 * (uin(i,j,k,n) - uin(i,j,k-dg2,n)); + } - } - - flux(i,j,k,n) += dx[idir] * div1; + flux(i,j,k,n) += dx[idir] * div_var; + } }); } @@ -359,37 +362,42 @@ Castro::apply_av_rad(const Box& bx, const auto dx = geom.CellSizeArray(); - Real diff_coeff = difmag; - - amrex::ParallelFor(bx, Radiation::nGroups, - [=] AMREX_GPU_DEVICE (int i, int j, int k, int n) noexcept - { + amrex::Real diff_coeff = difmag; - Real div1; - if (idir == 0) { + int ngroups = Radiation::nGroups; - div1 = 0.25_rt * (div(i,j,k) + div(i,j+dg1,k) + - div(i,j,k+dg2) + div(i,j+dg1,k+dg2)); - div1 = diff_coeff * amrex::min(0.0_rt, div1); - div1 = div1 * (Erin(i,j,k,n) - Erin(i-1,j,k,n)); + amrex::ParallelFor(bx, + [=] AMREX_GPU_DEVICE (int i, int j, int k) noexcept + { - } else if (idir == 1) { + Real div1; + if (idir == 0) { + div1 = 0.25_rt * (div(i,j,k) + div(i,j+dg1,k) + + div(i,j,k+dg2) + div(i,j+dg1,k+dg2)); + } else if (idir == 1) { + div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + + div(i,j,k+dg2) + div(i+1,j,k+dg2)); + } else { + div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + + div(i,j+dg1,k) + div(i+1,j+dg1,k)); + } - div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + - div(i,j,k+dg2) + div(i+1,j,k+dg2)); - div1 = diff_coeff * amrex::min(0.0_rt, div1); - div1 = div1 * (Erin(i,j,k,n) - Erin(i,j-dg1,k,n)); + div1 = diff_coeff * std::min(0.0_rt, div1); - } else { + for (int n = 0; n < ngroups; ++n) { - div1 = 0.25_rt * (div(i,j,k) + div(i+1,j,k) + - div(i,j+dg1,k) + div(i+1,j+dg1,k)); - div1 = diff_coeff * amrex::min(0.0_rt, div1); - div1 = div1 * (Erin(i,j,k,n) - Erin(i,j,k-dg2,n)); + Real div_var{}; - } + if (idir == 0) { + div_var = div1 * (Erin(i,j,k,n) - Erin(i-1,j,k,n)); + } else if (idir == 1) { + div_var = div1 * (Erin(i,j,k,n) - Erin(i,j-dg1,k,n)); + } else { + div_var = div1 * (Erin(i,j,k,n) - Erin(i,j,k-dg2,n)); + } - radflux(i,j,k,n) += dx[idir] * div1; + radflux(i,j,k,n) += dx[idir] * div_var; + } }); } #endif diff --git a/Source/hydro/reconstruction.H b/Source/hydro/reconstruction.H index 65d6fc31dc..9a41710663 100644 --- a/Source/hydro/reconstruction.H +++ b/Source/hydro/reconstruction.H @@ -1,6 +1,8 @@ #ifndef CASTRO_RECONSTRUCTION_H #define CASTRO_RECONSTRUCTION_H +#include + namespace reconstruction { enum slope_indices { im2 = 0, @@ -14,9 +16,9 @@ namespace reconstruction { AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -load_stencil(Array4 const& q_arr, const int idir, +load_stencil(amrex::Array4 const& q_arr, const int idir, const int i, const int j, const int k, const int ncomp, - Real* s) { + amrex::Real* s) { using namespace reconstruction; @@ -47,9 +49,11 @@ load_stencil(Array4 const& q_arr, const int idir, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -load_passive_stencil(Array4 const& U_arr, Array4 const& rho_inv_arr, const int idir, +load_passive_stencil(amrex::Array4 const& U_arr, + amrex::Array4 const& rho_inv_arr, + const int idir, const int i, const int j, const int k, const int ncomp, - Real* s) { + amrex::Real* s) { using namespace reconstruction; @@ -80,13 +84,16 @@ load_passive_stencil(Array4 const& U_arr, Array4 const& AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -add_geometric_rho_source(Array4 const& q_arr, - Array4 const& dloga, +add_geometric_rho_source(amrex::Array4 const& q_arr, + amrex::Array4 const& dloga, const int i, const int j, const int k, - Real* s) { + amrex::Real* s) { using namespace reconstruction; + // this takes the form: -alpha rho u / r + // where alpha = 1 for cylindrical and 2 for spherical + // note: this is assumed to be working only in the x-direction s[im2] += -dloga(i-2,j,k) * q_arr(i-2,j,k,QRHO) * q_arr(i-2,j,k,QU); @@ -98,13 +105,16 @@ add_geometric_rho_source(Array4 const& q_arr, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -add_geometric_rhoe_source(Array4 const& q_arr, - Array4 const& dloga, +add_geometric_rhoe_source(amrex::Array4 const& q_arr, + amrex::Array4 const& dloga, const int i, const int j, const int k, - Real* s) { + amrex::Real* s) { using namespace reconstruction; + // this takes the form: -alpha (rho e + p) u / r + // where alpha = 1 for cylindrical and 2 for spherical + // note: this is assumed to be working only in the x-direction s[im2] += -dloga(i-2,j,k) * (q_arr(i-2,j,k,QREINT) + q_arr(i-2,j,k,QPRES)) * q_arr(i-2,j,k,QU); @@ -116,23 +126,25 @@ add_geometric_rhoe_source(Array4 const& q_arr, AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE void -add_geometric_p_source(Array4 const& q_arr, - Array4 const& qaux_arr, - Array4 const& dloga, +add_geometric_p_source(amrex::Array4 const& q_arr, + amrex::Array4 const& qaux_arr, + amrex::Array4 const& dloga, const int i, const int j, const int k, - Real* s) { + amrex::Real* s) { using namespace reconstruction; + // this takes the form: -alpha Gamma1 p u / r + // where alpha = 1 for cylindrical and 2 for spherical + // note: this is assumed to be working only in the x-direction - s[im2] += -dloga(i-2,j,k) * q_arr(i-2,j,k,QRHO) * qaux_arr(i-2,j,k,QC) * q_arr(i-2,j,k,QU); - s[im1] += -dloga(i-1,j,k) * q_arr(i-1,j,k,QRHO) * qaux_arr(i-1,j,k,QC) * q_arr(i-1,j,k,QU); - s[i0] += -dloga(i,j,k) * q_arr(i,j,k,QRHO) * qaux_arr(i,j,k,QC) * q_arr(i,j,k,QU); - s[ip1] += -dloga(i+1,j,k) * q_arr(i+1,j,k,QRHO) * qaux_arr(i+1,j,k,QC) * q_arr(i+1,j,k,QU); - s[ip2] += -dloga(i+2,j,k) * q_arr(i+2,j,k,QRHO) * qaux_arr(i+2,j,k,QC) * q_arr(i+2,j,k,QU); + s[im2] += -dloga(i-2,j,k) * q_arr(i-2,j,k,QPRES) * qaux_arr(i-2,j,k,QGAMC) * q_arr(i-2,j,k,QU); + s[im1] += -dloga(i-1,j,k) * q_arr(i-1,j,k,QPRES) * qaux_arr(i-1,j,k,QGAMC) * q_arr(i-1,j,k,QU); + s[i0] += -dloga(i,j,k) * q_arr(i,j,k,QPRES) * qaux_arr(i,j,k,QGAMC) * q_arr(i,j,k,QU); + s[ip1] += -dloga(i+1,j,k) * q_arr(i+1,j,k,QPRES) * qaux_arr(i+1,j,k,QGAMC) * q_arr(i+1,j,k,QU); + s[ip2] += -dloga(i+2,j,k) * q_arr(i+2,j,k,QPRES) * qaux_arr(i+2,j,k,QGAMC) * q_arr(i+2,j,k,QU); } #endif - diff --git a/Source/problems/Castro_problem_source.cpp b/Source/problems/Castro_problem_source.cpp index dc4d08f575..68a48c64df 100644 --- a/Source/problems/Castro_problem_source.cpp +++ b/Source/problems/Castro_problem_source.cpp @@ -24,8 +24,8 @@ Castro::construct_old_ext_source(MultiFab& source, MultiFab& state_in, Real time if (verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { @@ -95,8 +95,8 @@ Castro::construct_new_ext_source(MultiFab& source, MultiFab& state_old, MultiFab if (verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { diff --git a/Source/reactions/Castro_react.cpp b/Source/reactions/Castro_react.cpp index 7fc8e069b9..b7a422f1dd 100644 --- a/Source/reactions/Castro_react.cpp +++ b/Source/reactions/Castro_react.cpp @@ -417,6 +417,11 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra #if defined(AMREX_USE_HIP) Gpu::streamSynchronize(); // otherwise HIP may fail to allocate the necessary resources. #endif + +#ifdef ALLOW_GPU_PRINTF + std::fflush(nullptr); +#endif + } #if defined(AMREX_USE_GPU) @@ -444,14 +449,16 @@ Castro::react_state(MultiFab& s, MultiFab& r, Real time, Real dt, const int stra if (verbose > 0) { const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::react_state() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::react_state() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif @@ -812,6 +819,11 @@ Castro::react_state(Real time, Real dt) #if defined(AMREX_USE_HIP) Gpu::streamSynchronize(); // otherwise HIP may fail to allocate the necessary resources. #endif + +#ifdef ALLOW_GPU_PRINTF + std::fflush(nullptr); +#endif + } #if defined(AMREX_USE_GPU) @@ -842,15 +854,17 @@ Castro::react_state(Real time, Real dt) amrex::Print() << "... Leaving burner on level " << level << " after completing full timestep of burning." << std::endl << std::endl; - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time, IOProc); - amrex::Print() << "Castro::react_state() time = " << run_time << " on level " << level << std::endl << std::endl; + amrex::Print() << "Castro::react_state() time = " << run_time + << " on level " << llevel << std::endl << std::endl; #ifdef BL_LAZY }); #endif diff --git a/Source/rotation/Castro_rotation.cpp b/Source/rotation/Castro_rotation.cpp index cfc6a2edc5..67cbf15384 100644 --- a/Source/rotation/Castro_rotation.cpp +++ b/Source/rotation/Castro_rotation.cpp @@ -34,14 +34,16 @@ Castro::construct_old_rotation_source(MultiFab& source, MultiFab& state_in, Real if (verbose > 1) { const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_old_rotation_source() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_old_rotation_source() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif @@ -89,13 +91,13 @@ Castro::construct_new_rotation_source(MultiFab& source, MultiFab& state_old, Mul { const int IOProc = ParallelDescriptor::IOProcessorNumber(); Real run_time = ParallelDescriptor::second() - strt_time; - + Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::construct_new_rotation_source() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::construct_new_rotation_source() time = " << run_time << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif diff --git a/Source/sdc/sdc_react_util.H b/Source/sdc/sdc_react_util.H index dffd1dd1f0..b174dd8d03 100644 --- a/Source/sdc/sdc_react_util.H +++ b/Source/sdc/sdc_react_util.H @@ -35,7 +35,7 @@ single_zone_react_source(burn_t& burn_state, eos(eos_input_re, burn_state); // eos_get_small_temp(&small_temp); - burn_state.T = amrex::min(MAX_TEMP, amrex::max(burn_state.T, small_temp)); + burn_state.T = std::clamp(burn_state.T, small_temp, integrator_rp::MAX_TEMP); Array1D ydot; diff --git a/Source/sources/Castro_geom.cpp b/Source/sources/Castro_geom.cpp index 282c52b3b1..e3c92732e0 100644 --- a/Source/sources/Castro_geom.cpp +++ b/Source/sources/Castro_geom.cpp @@ -33,8 +33,8 @@ Castro::construct_old_geom_source(MultiFab& source, MultiFab& state_in, Real tim if (verbose > 1) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { diff --git a/Source/sources/Castro_sources.cpp b/Source/sources/Castro_sources.cpp index 9a0be7b695..1c4981a7db 100644 --- a/Source/sources/Castro_sources.cpp +++ b/Source/sources/Castro_sources.cpp @@ -159,14 +159,16 @@ Castro::do_old_sources( if (verbose > 0) { const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::do_old_sources() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::do_old_sources() time = " << run_time + << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif @@ -242,15 +244,16 @@ Castro::do_new_sources( if (verbose > 0) { - const int IOProc = ParallelDescriptor::IOProcessorNumber(); - Real run_time = ParallelDescriptor::second() - strt_time; + const int IOProc = ParallelDescriptor::IOProcessorNumber(); + amrex::Real run_time = ParallelDescriptor::second() - strt_time; + amrex::Real llevel = level; #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); - amrex::Print() << "Castro::do_new_sources() time = " << run_time << " on level " << level << "\n" << "\n"; + amrex::Print() << "Castro::do_new_sources() time = " << run_time << " on level " << llevel << "\n" << "\n"; #ifdef BL_LAZY }); #endif diff --git a/external/Microphysics b/external/Microphysics index c2795ec391..14b8b0e317 160000 --- a/external/Microphysics +++ b/external/Microphysics @@ -1 +1 @@ -Subproject commit c2795ec391217dd0bb7d56044eaccac128d5112f +Subproject commit 14b8b0e3173041968943d4bbac2c4803a33abceb diff --git a/external/amrex b/external/amrex index 672e7bcd0d..ac5dde35b6 160000 --- a/external/amrex +++ b/external/amrex @@ -1 +1 @@ -Subproject commit 672e7bcd0d1e0b66e0baf3a9ff7322bd01b3ec69 +Subproject commit ac5dde35b6c10f5d91e289edeff218bde84878a4