From eeb925209f7eb2be053fe1f78afa652e9cfca1e6 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 22 Aug 2024 10:15:52 +0200 Subject: [PATCH] [june24] ** COMPLETE JUNE24 ** rerun 30 tmad tests on itscrd90 on june24 branch - everything ok STARTED AT Wed Aug 21 11:17:50 PM CEST 2024 (SM tests) ENDED(1) AT Thu Aug 22 03:22:15 AM CEST 2024 [Status=0] (BSM tests) ENDED(1) AT Thu Aug 22 03:33:50 AM CEST 2024 [Status=0] 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 24 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt eemumu MEK processed 8192 events across 2 channels { 1 : 8192 } eemumu MEK processed 90112 events across 2 channels { 1 : 90112 } ggttggg MEK processed 8192 events across 1240 channels { 1 : 8192 } ggttggg MEK processed 90112 events across 1240 channels { 1 : 90112 } ggttgg MEK processed 8192 events across 123 channels { 112 : 8192 } ggttgg MEK processed 90112 events across 123 channels { 112 : 90112 } ggttg MEK processed 8192 events across 16 channels { 1 : 8192 } ggttg MEK processed 90112 events across 16 channels { 1 : 90112 } ggtt MEK processed 8192 events across 3 channels { 1 : 8192 } ggtt MEK processed 90112 events across 3 channels { 1 : 90112 } gqttq MEK processed 8192 events across 5 channels { 1 : 8192 } gqttq MEK processed 90112 events across 5 channels { 1 : 90112 } heftggbb MEK processed 8192 events across 4 channels { 1 : 8192 } heftggbb MEK processed 90112 events across 4 channels { 1 : 90112 } smeftggtttt MEK processed 8192 events across 72 channels { 1 : 8192 } smeftggtttt MEK processed 90112 events across 72 channels { 1 : 90112 } susyggt1t1 MEK processed 8192 events across 6 channels { 3 : 8192 } susyggt1t1 MEK processed 90112 events across 6 channels { 3 : 90112 } susyggtt MEK processed 8192 events across 3 channels { 1 : 8192 } susyggtt MEK processed 90112 events across 3 channels { 1 : 90112 } --- .../log_eemumu_mad_d_inl0_hrd0.txt | 226 ++++++++-------- .../log_eemumu_mad_f_inl0_hrd0.txt | 244 +++++++++-------- .../log_eemumu_mad_m_inl0_hrd0.txt | 234 ++++++++-------- .../log_ggtt_mad_d_inl0_hrd0.txt | 230 ++++++++-------- .../log_ggtt_mad_f_inl0_hrd0.txt | 230 ++++++++-------- .../log_ggtt_mad_m_inl0_hrd0.txt | 234 ++++++++-------- .../log_ggttg_mad_d_inl0_hrd0.txt | 242 +++++++++-------- .../log_ggttg_mad_f_inl0_hrd0.txt | 240 ++++++++-------- .../log_ggttg_mad_m_inl0_hrd0.txt | 232 ++++++++-------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 246 +++++++++-------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 255 ++++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 248 +++++++++-------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 256 +++++++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 248 +++++++++-------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 254 ++++++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 234 ++++++++-------- .../log_gqttq_mad_f_inl0_hrd0.txt | 236 ++++++++-------- .../log_gqttq_mad_m_inl0_hrd0.txt | 236 ++++++++-------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 228 ++++++++-------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 84 +++--- .../log_heftggbb_mad_m_inl0_hrd0.txt | 243 +++++++++-------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 246 +++++++++-------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 236 ++++++++-------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 246 +++++++++-------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 228 ++++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 236 ++++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 234 ++++++++-------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 234 ++++++++-------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 236 ++++++++-------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 234 ++++++++-------- 30 files changed, 3680 insertions(+), 3330 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 01107f564b..daecd2c094 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:42:55 +DATE: 2024-08-21_23:21:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6950s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6868s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 3837 events (found 8192 events) + [COUNTERS] PROGRAM TOTAL : 0.6967s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6893s + [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1770s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1693s - [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.07E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1740s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1662s + [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s - [COUNTERS] Fortran MEs ( 1 ) : 0.0835s for 90112 events => throughput is 1.08E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3694s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2869s + [COUNTERS] Fortran MEs ( 1 ) : 0.0826s for 90112 events => throughput is 1.09E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661545E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1777s + [XSECTION] Cross section = 0.09367 [9.3673952392424320E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1776s [COUNTERS] Fortran Overhead ( 0 ) : 0.1702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 8192 events => throughput is 1.14E+06 events/s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0071s for 8192 events => throughput is 1.16E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661545E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424320E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0766s for 90112 events => throughput is 1.18E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370556E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3688s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2914s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0771s for 90112 events => throughput is 1.17E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370556E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.167196e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.156520e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.165900e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175361e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1704s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1705s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424348E-002) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3353s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 90112 events => throughput is 1.94E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370556E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3344s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0462s for 90112 events => throughput is 1.95E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370556E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.918558e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.926932e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.023579e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.972682e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1786s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1693s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.45E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424334E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3295s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2928s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 90112 events => throughput is 2.48E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3301s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2941s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 90112 events => throughput is 2.53E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370570E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.640473e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.534029e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.831088e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.718709e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1718s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1706s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.50E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424334E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3209s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2867s + [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3323s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2981s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0338s for 90112 events => throughput is 2.66E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370570E-002) differ by less than 3E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.678759e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.675163e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.813366e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.769101e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1692s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0040s for 8192 events => throughput is 2.04E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1729s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1686s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.10E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424334E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000739E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3322s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0405s for 90112 events => throughput is 2.22E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3337s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2916s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0417s for 90112 events => throughput is 2.16E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602020000739E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370570E-002) differ by less than 3E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.108602e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.080383e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.253882e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.174454e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6096s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6084s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.32E+07 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.6032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.85E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715404661532E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.3673952392424348E-002) and cuda (9.3673952392424348E-002) differ by less than 3E-14 (0.0) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000753E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7166s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.84E+07 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.7563s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 90112 events => throughput is 1.15E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602020000753E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.1552612798370570E-002) and cuda (9.1552612798370570E-002) differ by less than 3E-14 (0.0) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.377977e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.292897e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.939853e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688587e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.088090e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.531836e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.478718e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.913745e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.243737e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.521901e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.989285e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.905057e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.238682e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.526656e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.131222e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.185541e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 617aae1ec8..5a284b3bb2 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' - -make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:43:11 +DATE: 2024-08-21_23:21:46 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7259s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7175s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.72E+05 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 3837 events (found 8192 events) + [COUNTERS] PROGRAM TOTAL : 0.6860s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6783s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1878s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1797s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1677s + [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3875s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3018s - [COUNTERS] Fortran MEs ( 1 ) : 0.0857s for 90112 events => throughput is 1.05E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3725s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2891s + [COUNTERS] Fortran MEs ( 1 ) : 0.0834s for 90112 events => throughput is 1.08E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382703205998396E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673940164823388E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1718s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1647s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.20E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382703205998396E-002) differ by less than 4E-4 (1.306308462512007e-07) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673940164823388E-002) differ by less than 4E-4 (1.3053362912796018e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515590123565249E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3784s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3020s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0761s for 90112 events => throughput is 1.18E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552600830551153E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3581s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0733s for 90112 events => throughput is 1.23E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515590123565249E-002) differ by less than 4E-4 (1.2999352305698153e-07) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552600830551153E-002) differ by less than 4E-4 (1.3072067583941305e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.232262e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.213993e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.234403e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.231490e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382700723828302E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1808s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1776s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [XSECTION] Cross section = 0.09367 [9.3673937587540376E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1714s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1683s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.84E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700723828302E-002) differ by less than 4E-4 (1.5721146218172777e-07) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673937587540376E-002) differ by less than 4E-4 (1.5804696607002455e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515587612890761E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2977s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0297s for 90112 events => throughput is 3.03E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552598352995826E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2858s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0286s for 90112 events => throughput is 3.15E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587612890761E-002) differ by less than 4E-4 (1.5742791048545257e-07) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552598352995826E-002) differ by less than 4E-4 (1.5778222273166165e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.119755e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.102860e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.282267e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.301247e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1827s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.27E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [XSECTION] Cross section = 0.09367 [9.3673937665039383E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1687s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.35E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673937665039383E-002) differ by less than 4E-4 (1.5721963886328183e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3317s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3038s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0276s for 90112 events => throughput is 3.26E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552598384447972E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2893s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0266s for 90112 events => throughput is 3.39E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552598384447972E-002) differ by less than 4E-4 (1.5743868098105906e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.481016e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.421541e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.570800e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.575027e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382700679354239E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.35E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [XSECTION] Cross section = 0.09367 [9.3673937665039383E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1720s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382700679354239E-002) differ by less than 4E-4 (1.576877179942926e-07) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673937665039383E-002) differ by less than 4E-4 (1.5721963886328183e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515587619408464E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3314s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 90112 events => throughput is 3.33E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [XSECTION] Cross section = 0.09155 [9.1552598384447972E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3191s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0263s for 90112 events => throughput is 3.42E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515587619408464E-002) differ by less than 4E-4 (1.573566908996682e-07) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552598384447972E-002) differ by less than 4E-4 (1.5743868098105906e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.644439e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.471070e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.697078e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660300e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382704335459282E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1845s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1814s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.04E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673941354609866E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1701s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1674s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.47E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382704335459282E-002) differ by less than 4E-4 (1.1853587900123586e-07) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673941354609866E-002) differ by less than 4E-4 (1.1783227038542066e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515591296252558E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3372s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0290s for 90112 events => throughput is 3.10E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552602074172512E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2929s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 90112 events => throughput is 3.36E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515591296252558E-002) differ by less than 4E-4 (1.1717945325173673e-07) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552602074172512E-002) differ by less than 4E-4 (1.1713699621385132e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.387501e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.470369e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.616268e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.517383e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382706077425631E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.6084s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6073s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.48E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [XSECTION] Cross section = 0.09367 [9.3673942834136617E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.6017s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5984s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382706077425631E-002) differ by less than 4E-4 (9.988182347875352e-08) +OK! xsec from fortran (9.3673952392424348E-002) and cuda (9.3673942834136617E-002) differ by less than 4E-4 (1.0203783962214885e-07) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515592892887687E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7292s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7238s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 90112 events => throughput is 1.86E+07 events/s + [XSECTION] Cross section = 0.09155 [9.1552603722717646E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.7272s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7192s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 90112 events => throughput is 1.22E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515592892887687E-002) differ by less than 4E-4 (9.973286385633884e-08) +OK! xsec from fortran (9.1552612798370570E-002) and cuda (9.1552603722717646E-002) differ by less than 4E-4 (9.913046328247077e-08) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.601368e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.549092e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718163e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.631236e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.633474e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.819075e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.898384e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.234401e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.829286e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.780965e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.104797e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.267656e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.012752e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.364732e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.802072e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.895706e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index e51bbf394d..96c3ce60f2 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,15 +1,14 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make USEBUILDDIR=1 BACKEND=cppavx2 make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' @@ -17,6 +16,7 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:43:26 +DATE: 2024-08-21_23:22:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 3798 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.6983s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6906s - [COUNTERS] Fortran MEs ( 1 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 3837 events (found 8192 events) + [COUNTERS] PROGRAM TOTAL : 0.6850s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6775s + [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715404661532E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1791s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1711s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1777s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1699s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602020000766E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3694s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2869s - [COUNTERS] Fortran MEs ( 1 ) : 0.0825s for 90112 events => throughput is 1.09E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3709s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s + [COUNTERS] Fortran MEs ( 1 ) : 0.0821s for 90112 events => throughput is 1.10E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715420701395E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1846s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1767s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 8192 events => throughput is 1.09E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952389194196E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1776s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1699s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 8192 events => throughput is 1.11E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701395E-002) differ by less than 2E-4 (1.7176482458580722e-10) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952389194196E-002) differ by less than 2E-4 (3.448297203334505e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3660s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2865s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0792s for 90112 events => throughput is 1.14E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.09155 [9.1552612788773982E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3753s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0787s for 90112 events => throughput is 1.15E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002) differ by less than 2E-4 (1.4292744765498355e-10) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612788773982E-002) differ by less than 2E-4 (1.0482048562465707e-10) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.124575e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.139138e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154252e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.154121e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715420701354E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1757s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1709s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.88E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952389194196E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1695s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.91E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715420701354E-002) differ by less than 2E-4 (1.7176438049659737e-10) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952389194196E-002) differ by less than 2E-4 (3.448297203334505e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602033080859E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) + [XSECTION] Cross section = 0.09155 [9.1552612788773982E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) [COUNTERS] PROGRAM TOTAL : 0.3336s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2878s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 90112 events => throughput is 1.98E+06 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0446s for 90112 events => throughput is 2.02E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602033080859E-002) differ by less than 2E-4 (1.4292744765498355e-10) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612788773982E-002) differ by less than 2E-4 (1.0482048562465707e-10) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.982594e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991362e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.052848e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.089961e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1749s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.31E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1716s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.51E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952388695372E-002) differ by less than 2E-4 (3.980804574865715e-11) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3282s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0358s for 90112 events => throughput is 2.51E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3291s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2932s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 90112 events => throughput is 2.53E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612782299689E-002) differ by less than 2E-4 (1.7553714037887858e-10) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.552156e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.597451e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.649390e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.673102e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1744s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1708s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1753s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1718s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952388695372E-002) differ by less than 2E-4 (3.980804574865715e-11) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3217s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2876s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0337s for 90112 events => throughput is 2.67E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3327s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2975s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0349s for 90112 events => throughput is 2.58E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612782299689E-002) differ by less than 2E-4 (1.7553714037887858e-10) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.650509e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.603419e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.719714e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.725562e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715383664494E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.1750s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0035s for 8192 events => throughput is 2.36E+06 events/s + [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.1743s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1698s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cpp (9.3382715383664494E-002) differ by less than 2E-4 (2.2484925032983938e-10) +OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952388695372E-002) differ by less than 2E-4 (3.980804574865715e-11) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602022697845E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.3264s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2866s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0395s for 90112 events => throughput is 2.28E+06 events/s + [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.3345s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2949s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0393s for 90112 events => throughput is 2.29E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cpp (9.1515602022697845E-002) differ by less than 2E-4 (2.947131427788463e-11) +OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612782299689E-002) differ by less than 2E-4 (1.7553714037887858e-10) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.207219e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.166467e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.300574e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.338327e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09338 [9.3382715392009194E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1591 events (found 1595 events) - [COUNTERS] PROGRAM TOTAL : 0.5992s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5980s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.38E+07 events/s + [XSECTION] Cross section = 0.09367 [9.3673952381938416E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1566 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 0.6018s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5983s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.89E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.3382715404661532E-002) and cuda (9.3382715392009194E-002) differ by less than 2E-4 (1.3548906441229747e-10) +OK! xsec from fortran (9.3673952392424348E-002) and cuda (9.3673952381938416E-002) differ by less than 2E-4 (1.1194078997078805e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09152 [9.1515602021089631E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1782 events (found 1787 events) - [COUNTERS] PROGRAM TOTAL : 0.7158s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7101s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 90112 events => throughput is 1.80E+07 events/s + [XSECTION] Cross section = 0.09155 [9.1552612789338281E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1803 events (found 1808 events) + [COUNTERS] PROGRAM TOTAL : 0.7437s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7353s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.16E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1515602020000766E-002) and cuda (9.1515602021089631E-002) differ by less than 2E-4 (1.1898038110302878e-11) +OK! xsec from fortran (9.1552612798370570E-002) and cuda (9.1552612789338281E-002) differ by less than 2E-4 (9.865686045884559e-11) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.054665e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.272692e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.970842e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763158e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.242307e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.531744e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.491734e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.899178e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.221256e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.370218e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.104459e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.855970e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.208981e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.547442e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.160987e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.170954e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 8d24f348d7..b0114d5758 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -2,21 +2,21 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:43:42 +DATE: 2024-08-21_23:22:17 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 - [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8083s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7667s - [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 + [UNWEIGHT] Wrote 2613 events (found 5374 events) + [COUNTERS] PROGRAM TOTAL : 0.7737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7331s + [COUNTERS] Fortran MEs ( 1 ) : 0.0406s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4194s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3777s - [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4005s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3601s + [COUNTERS] Fortran MEs ( 1 ) : 0.0403s for 8192 events => throughput is 2.03E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7491s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2980s - [COUNTERS] Fortran MEs ( 1 ) : 0.4511s for 90112 events => throughput is 2.00E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.6712s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2200s + [COUNTERS] Fortran MEs ( 1 ) : 0.4512s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4196s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0426s for 8192 events => throughput is 1.92E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4033s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3595s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.190399993877946) and cpp (47.190399993877946) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989099] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7813s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4811s for 90112 events => throughput is 1.87E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.7381s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2634s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4742s for 90112 events => throughput is 1.90E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989099) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (47.156031786487532) and cpp (47.156031786487532) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.879822e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.862738e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.903748e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.868889e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4107s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0250s for 8192 events => throughput is 3.28E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3888s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0236s for 8192 events => throughput is 3.48E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.190399993877946) and cpp (47.190399993877946) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989106] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5717s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3004s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2709s for 90112 events => throughput is 3.33E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.5417s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2732s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2681s for 90112 events => throughput is 3.36E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989106) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.156031786487532) and cpp (47.156031786487532) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.310019e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.313561e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.203674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.313013e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3916s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.35E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3790s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3633s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.190399993877946) and cpp (47.190399993877953) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4759s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3059s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1696s for 90112 events => throughput is 5.31E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487524] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.4320s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2636s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1680s for 90112 events => throughput is 5.36E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989135) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.156031786487532) and cpp (47.156031786487524) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.223657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.262826e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.200982e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.374834e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3808s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3751s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3607s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.88E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.190399993877946) and cpp (47.190399993877953) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4542s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3022s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1516s for 90112 events => throughput is 5.94E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487524] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.4316s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2798s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1514s for 90112 events => throughput is 5.95E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989135) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.156031786487532) and cpp (47.156031786487524) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.865744e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.702530e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.035557e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.901849e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756647] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4098s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 8192 events => throughput is 3.43E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094184803756647) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.190399993877946) and cpp (47.190399993877953) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989135] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5428s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3039s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 90112 events => throughput is 3.78E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487524] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.4896s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2568s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2323s for 90112 events => throughput is 3.88E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105695279989135) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.156031786487532) and cpp (47.156031786487524) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.669812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.628731e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.898434e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.697546e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8047s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8033s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.27E+07 events/s + [XSECTION] Cross section = 47.19 [47.190399993877953] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.7972s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7935s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.86E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cuda (47.094184803756640) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.190399993877946) and cuda (47.190399993877953) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989121] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7304s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7231s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s + [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.7034s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6933s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 90112 events => throughput is 9.71E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cuda (47.105695279989121) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.156031786487532) and cuda (47.156031786487532) differ by less than 3E-14 (0.0) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.008892e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.145019e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.654647e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420127e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.331472e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.903378e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082448e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.708186e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.310542e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.903438e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160861e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.815730e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.331806e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.891168e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.063253e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.716990e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 420861126b..5bec457add 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:44:09 +DATE: 2024-08-21_23:22:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 - [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8019s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7604s - [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 + [UNWEIGHT] Wrote 2613 events (found 5374 events) + [COUNTERS] PROGRAM TOTAL : 0.7830s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7417s + [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4215s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3800s - [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4008s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3603s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7567s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3058s - [COUNTERS] Fortran MEs ( 1 ) : 0.4510s for 90112 events => throughput is 2.00E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.6651s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2180s + [COUNTERS] Fortran MEs ( 1 ) : 0.4471s for 90112 events => throughput is 2.02E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094179692708323] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3790s + [XSECTION] Cross section = 47.19 [47.190394119386738] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4000s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3587s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094179692708323) differ by less than 4E-4 (1.0852822573959031e-07) +OK! xsec from fortran (47.190399993877946) and cpp (47.190394119386738) differ by less than 4E-4 (1.2448487851646206e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105688388783328] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7678s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4582s for 90112 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 47.16 [47.156028163566589] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.7067s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4485s for 90112 events => throughput is 2.01E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105688388783328) differ by less than 4E-4 (1.462924120732012e-07) +OK! xsec from fortran (47.156031786487532) and cpp (47.156028163566589) differ by less than 4E-4 (7.682836755673605e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.984608e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.030791e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996032e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018545e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094175707109216] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3923s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.83E+05 events/s + [XSECTION] Cross section = 47.19 [47.190390126085290] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3749s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3578s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094175707109216) differ by less than 4E-4 (1.9315861321533845e-07) +OK! xsec from fortran (47.190399993877946) and cpp (47.190390126085290) differ by less than 4E-4 (2.091059337905321e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684583433771] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4893s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3053s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1837s for 90112 events => throughput is 4.90E+05 events/s + [XSECTION] Cross section = 47.16 [47.156024287692041] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.4396s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1834s for 90112 events => throughput is 4.91E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105684583433771) differ by less than 4E-4 (2.2707562807866566e-07) +OK! xsec from fortran (47.156031786487532) and cpp (47.156024287692041) differ by less than 4E-4 (1.5902091854425038e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.831484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.745031e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.765454e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.740555e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3779s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 8192 events => throughput is 8.99E+05 events/s + [XSECTION] Cross section = 47.19 [47.190390514620915] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3675s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.45E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094173726920275) differ by less than 4E-4 (2.3520603253945893e-07) +OK! xsec from fortran (47.190399993877946) and cpp (47.190390514620915) differ by less than 4E-4 (2.008725722424387e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4091s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3116s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0972s for 90112 events => throughput is 9.27E+05 events/s + [XSECTION] Cross section = 47.16 [47.156022338885265] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.3593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2629s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0961s for 90112 events => throughput is 9.38E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105684037363524) differ by less than 4E-4 (2.386680745258829e-07) +OK! xsec from fortran (47.156031786487532) and cpp (47.156022338885265) differ by less than 4E-4 (2.0034769487864423e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.995090e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.314080e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.148417e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.260538e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094173726920275] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3807s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.68E+05 events/s + [XSECTION] Cross section = 47.19 [47.190390514620915] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3676s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3590s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.99E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094173726920275) differ by less than 4E-4 (2.3520603253945893e-07) +OK! xsec from fortran (47.190399993877946) and cpp (47.190390514620915) differ by less than 4E-4 (2.008725722424387e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105684037363524] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.3961s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3040s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0917s for 90112 events => throughput is 9.82E+05 events/s + [XSECTION] Cross section = 47.16 [47.156022338885265] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.3552s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2643s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105684037363524) differ by less than 4E-4 (2.386680745258829e-07) +OK! xsec from fortran (47.156031786487532) and cpp (47.156022338885265) differ by less than 4E-4 (2.0034769487864423e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.994646e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.807528e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.882184e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.824773e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094178448427996] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3945s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3828s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0112s for 8192 events => throughput is 7.31E+05 events/s + [XSECTION] Cross section = 47.19 [47.190394861161103] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3636s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0115s for 8192 events => throughput is 7.10E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094178448427996) differ by less than 4E-4 (1.3494932904478674e-07) +OK! xsec from fortran (47.190399993877946) and cpp (47.190394861161103) differ by less than 4E-4 (1.0876612288601706e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105688391432061] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5017s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3657s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1356s for 90112 events => throughput is 6.64E+05 events/s + [XSECTION] Cross section = 47.16 [47.156026777408194] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.3813s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1242s for 90112 events => throughput is 7.26E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105688391432061) differ by less than 4E-4 (1.462361824966507e-07) +OK! xsec from fortran (47.156031786487532) and cpp (47.156026777408194) differ by less than 4E-4 (1.0622351265254792e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.837763e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.802954e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.925566e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.242807e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184162782994] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8112s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8099s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [XSECTION] Cross section = 47.19 [47.190400428492907] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.7963s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7928s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.80E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cuda (47.094184162782994) differ by less than 4E-4 (1.3610462645807786e-08) +OK! xsec from fortran (47.190399993877946) and cuda (47.190400428492907) differ by less than 4E-4 (9.209817353195149e-09) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105694501043516] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7829s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 90112 events => throughput is 1.58E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [XSECTION] Cross section = 47.16 [47.156031802494475] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.7035s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6947s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 90112 events => throughput is 1.10E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cuda (47.105694501043516) differ by less than 4E-4 (1.6536123581545326e-08) +OK! xsec from fortran (47.156031786487532) and cuda (47.156031802494475) differ by less than 4E-4 (3.394462488870431e-10) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.085941e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.320560e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.178660e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.681991e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.983696e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.939579e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.406286e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.369535e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.010543e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.929730e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.536473e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.387560e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.527299e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.692994e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.475317e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.052520e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 65f004f30e..c865bf6334 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -3,9 +3,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -18,11 +18,11 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:44:34 +DATE: 2024-08-21_23:23:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 - [UNWEIGHT] Wrote 2601 events (found 5405 events) - [COUNTERS] PROGRAM TOTAL : 0.8115s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7704s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 + [UNWEIGHT] Wrote 2613 events (found 5374 events) + [COUNTERS] PROGRAM TOTAL : 0.7835s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7423s + [COUNTERS] Fortran MEs ( 1 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184803756640] fbridge_mode=0 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4214s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3805s - [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3977s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3580s + [COUNTERS] Fortran MEs ( 1 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279989114] fbridge_mode=0 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7670s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3128s - [COUNTERS] Fortran MEs ( 1 ) : 0.4542s for 90112 events => throughput is 1.98E+05 events/s + [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.6693s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2202s + [COUNTERS] Fortran MEs ( 1 ) : 0.4491s for 90112 events => throughput is 2.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4222s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3775s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s + [XSECTION] Cross section = 47.19 [47.190401334262738] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4118s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3674s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0439s for 8192 events => throughput is 1.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) +OK! xsec from fortran (47.190399993877946) and cpp (47.190401334262738) differ by less than 2E-4 (2.8403760010675683e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696630006634] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7889s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3008s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4877s for 90112 events => throughput is 1.85E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 47.16 [47.156033127698386] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.7494s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2656s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4833s for 90112 events => throughput is 1.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006634) differ by less than 2E-4 (2.865932691165085e-08) +OK! xsec from fortran (47.156031786487532) and cpp (47.156033127698386) differ by less than 2E-4 (2.844197877216459e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.863098e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.886135e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.876650e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.870290e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186141863901] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4042s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3795s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0242s for 8192 events => throughput is 3.38E+05 events/s + [XSECTION] Cross section = 47.19 [47.190401334262724] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3979s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3724s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0252s for 8192 events => throughput is 3.25E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186141863901) differ by less than 2E-4 (2.8413428720952538e-08) +OK! xsec from fortran (47.190399993877946) and cpp (47.190401334262724) differ by less than 2E-4 (2.8403759566586473e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696630006626] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5750s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2680s for 90112 events => throughput is 3.36E+05 events/s + [XSECTION] Cross section = 47.16 [47.156033127698386] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.6689s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2845s for 90112 events => throughput is 3.17E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696630006626) differ by less than 2E-4 (2.8659326689606246e-08) +OK! xsec from fortran (47.156031786487532) and cpp (47.156033127698386) differ by less than 2E-4 (2.844197877216459e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.334875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333157e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.372227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.262953e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3946s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3794s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0147s for 8192 events => throughput is 5.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4002s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0160s for 8192 events => throughput is 5.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) +OK! xsec from fortran (47.190399993877946) and cpp (47.190401377780539) differ by less than 2E-4 (2.932593479165746e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4696s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3034s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1658s for 90112 events => throughput is 5.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.5138s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3371s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1763s for 90112 events => throughput is 5.11E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) +OK! xsec from fortran (47.156031786487532) and cpp (47.156033163835552) differ by less than 2E-4 (2.9208310436956708e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.223051e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.326444e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.767945e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.413178e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4019s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.78E+05 events/s + [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3735s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3597s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0134s for 8192 events => throughput is 6.13E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) +OK! xsec from fortran (47.190399993877946) and cpp (47.190401377780539) differ by less than 2E-4 (2.932593479165746e-08) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.4595s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3077s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1514s for 90112 events => throughput is 5.95E+05 events/s + [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.4104s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2613s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1487s for 90112 events => throughput is 6.06E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) +OK! xsec from fortran (47.156031786487532) and cpp (47.156033163835552) differ by less than 2E-4 (2.9208310436956708e-08) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.889622e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.974222e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.919078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.003657e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094186169585456] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4002s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3783s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0214s for 8192 events => throughput is 3.82E+05 events/s + [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3819s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.13E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cpp (47.094186169585456) differ by less than 2E-4 (2.9002069412698006e-08) +OK! xsec from fortran (47.190399993877946) and cpp (47.190401377780539) differ by less than 2E-4 (2.932593479165746e-08) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105696663215774] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.5451s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3093s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2354s for 90112 events => throughput is 3.83E+05 events/s + [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.4866s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2541s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2320s for 90112 events => throughput is 3.88E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cpp (47.105696663215774) differ by less than 2E-4 (2.9364318976377035e-08) +OK! xsec from fortran (47.156031786487532) and cpp (47.156033163835552) differ by less than 2E-4 (2.9208310436956708e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.737875e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.645050e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.863403e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.721943e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.09 [47.094184798437830] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8029s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8014s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.26E+07 events/s + [XSECTION] Cross section = 47.19 [47.190399989386655] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.8086s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8048s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.094184803756640) and cuda (47.094184798437830) differ by less than 2E-4 (1.1293987967064822e-10) +OK! xsec from fortran (47.190399993877946) and cuda (47.190399989386655) differ by less than 2E-4 (9.517386878599154e-11) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.11 [47.105695279068492] fbridge_mode=1 - [UNWEIGHT] Wrote 1744 events (found 1749 events) - [COUNTERS] PROGRAM TOTAL : 1.7390s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7315s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [XSECTION] Cross section = 47.16 [47.156031785698381] fbridge_mode=1 + [UNWEIGHT] Wrote 1766 events (found 1771 events) + [COUNTERS] PROGRAM TOTAL : 1.7050s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6950s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 90112 events => throughput is 9.88E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.105695279989114) and cuda (47.105695279068492) differ by less than 2E-4 (1.954369999168648e-11) +OK! xsec from fortran (47.156031786487532) and cuda (47.156031785698381) differ by less than 2E-4 (1.6734835739384835e-11) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.004360e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.257129e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.618155e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.443056e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.337805e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.916669e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.064726e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.643495e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.321717e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.757485e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.141622e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.018784e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.487761e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.883016e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.948699e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.725531e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index c52a8af2f9..e9ca264005 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:45:01 +DATE: 2024-08-21_23:23:34 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 - [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6887s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3666s - [COUNTERS] Fortran MEs ( 1 ) : 0.3221s for 8192 events => throughput is 2.54E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 + [UNWEIGHT] Wrote 387 events (found 1591 events) + [COUNTERS] PROGRAM TOTAL : 0.6765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3561s + [COUNTERS] Fortran MEs ( 1 ) : 0.3204s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6558s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3350s - [COUNTERS] Fortran MEs ( 1 ) : 0.3208s for 8192 events => throughput is 2.55E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.6348s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3158s + [COUNTERS] Fortran MEs ( 1 ) : 0.3190s for 8192 events => throughput is 2.57E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1103s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5412s - [COUNTERS] Fortran MEs ( 1 ) : 3.5692s for 90112 events => throughput is 2.52E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 5.0026s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4809s + [COUNTERS] Fortran MEs ( 1 ) : 3.5218s for 90112 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6762s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3380s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3370s for 8192 events => throughput is 2.43E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.6544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3348s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395722) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.2687s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5495s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7180s for 90112 events => throughput is 2.42E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 5.1821s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5214s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.6595s for 90112 events => throughput is 2.46E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.517328e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.507558e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.477316e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.547676e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607748863] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5207s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1801s for 8192 events => throughput is 4.55E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806079395720] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.5005s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1740s for 8192 events => throughput is 4.71E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607748863) differ by less than 3E-14 (2.453592884421596e-14) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395720) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717666E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.4936s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5370s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9559s for 90112 events => throughput is 4.61E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 3.4404s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5166s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.9230s for 90112 events => throughput is 4.69E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717666E-002) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.723167e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.760284e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.710741e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.818037e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4289s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3383s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0899s for 8192 events => throughput is 9.11E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [XSECTION] Cross section = 0.1056 [0.10562806079395723] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.4049s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0858s for 8192 events => throughput is 9.55E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395723) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.5415s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5644s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9765s for 90112 events => throughput is 9.23E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.5018s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9696s for 90112 events => throughput is 9.29E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.063994e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.496117e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.113779e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.659471e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4521s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3684s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0831s for 8192 events => throughput is 9.86E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [XSECTION] Cross section = 0.1056 [0.10562806079395723] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.3950s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3153s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0791s for 8192 events => throughput is 1.04E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395723) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.4440s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5615s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8818s for 90112 events => throughput is 1.02E+05 events/s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.3923s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5246s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8670s for 90112 events => throughput is 1.04E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.056563e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.025384e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.066565e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.032273e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749110] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4498s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3385s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1106s for 8192 events => throughput is 7.41E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [XSECTION] Cross section = 0.1056 [0.10562806079395724] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.4508s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3335s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1164s for 8192 events => throughput is 7.03E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748607749110) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395724) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7606s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5479s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2119s for 90112 events => throughput is 7.44E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.7977s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5634s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2335s for 90112 events => throughput is 7.31E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238481932717722E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.524660e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.461868e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.502357e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.652849e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.8444s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8355s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0059s for 8192 events => throughput is 1.38E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.7658s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 8192 events => throughput is 8.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748607749111) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.10562806079395722) and cuda (0.10562806079395722) differ by less than 3E-14 (0.0) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717736E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9827s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.86E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [XSECTION] Cross section = 0.07995 [7.9946447903217269E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 1.9812s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9510s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0272s for 90112 events => throughput is 3.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481932717736E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9946447903217255E-002) and cuda (7.9946447903217269E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.637288e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.138759e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.243124e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.564735e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.002014e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.481528e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.239487e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.163059e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.002136e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.454061e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.250655e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166559e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.001900e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.473246e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.746731e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.640891e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index b25cff31e4..17e9f17d1d 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:45:43 +DATE: 2024-08-21_23:24:15 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 - [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6879s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s - [COUNTERS] Fortran MEs ( 1 ) : 0.3221s for 8192 events => throughput is 2.54E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 + [UNWEIGHT] Wrote 387 events (found 1591 events) + [COUNTERS] PROGRAM TOTAL : 0.7128s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s + [COUNTERS] Fortran MEs ( 1 ) : 0.3361s for 8192 events => throughput is 2.44E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6575s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3322s - [COUNTERS] Fortran MEs ( 1 ) : 0.3252s for 8192 events => throughput is 2.52E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.6616s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3241s + [COUNTERS] Fortran MEs ( 1 ) : 0.3375s for 8192 events => throughput is 2.43E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.0903s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5245s - [COUNTERS] Fortran MEs ( 1 ) : 3.5658s for 90112 events => throughput is 2.53E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 5.0043s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4854s + [COUNTERS] Fortran MEs ( 1 ) : 3.5189s for 90112 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112722616246457] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6630s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3346s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3273s for 8192 events => throughput is 2.50E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562804416188390] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.6377s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3189s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3177s for 8192 events => throughput is 2.58E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112722616246457) differ by less than 4E-4 (2.570171934723753e-06) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562804416188390) differ by less than 4E-4 (1.57458853178305e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238468293717765E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1318s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5454s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5854s for 90112 events => throughput is 2.51E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946434556369253E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 5.0665s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5183s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5472s for 90112 events => throughput is 2.54E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238468293717765E-002) differ by less than 4E-4 (1.721259623721494e-07) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946434556369253E-002) differ by less than 4E-4 (1.6694735482936096e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.562809e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.550863e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.549301e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.559205e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112720694019242] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4414s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0997s for 8192 events => throughput is 8.22E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562802510294199] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.4183s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3178s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1000s for 8192 events => throughput is 8.20E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112720694019242) differ by less than 4E-4 (2.760251535116609e-06) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562802510294199) differ by less than 4E-4 (3.37893311330717e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238454783817719E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.6571s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5548s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1018s for 90112 events => throughput is 8.18E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946421150520988E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.5899s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5081s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0813s for 90112 events => throughput is 8.33E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238454783817719E-002) differ by less than 4E-4 (3.4262266690454624e-07) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946421150520988E-002) differ by less than 4E-4 (3.3463270687761337e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.333170e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.459907e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.397937e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.487595e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3825s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3366s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 8192 events => throughput is 1.80E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.1056 [0.10562802567355024] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.3603s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3147s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0452s for 8192 events => throughput is 1.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721757974454) differ by less than 4E-4 (2.655042234289695e-06) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562802567355024) differ by less than 4E-4 (3.324912595248364e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0649s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5077s for 90112 events => throughput is 1.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.07995 [7.9946420380865552E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.0104s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5115s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4984s for 90112 events => throughput is 1.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453732924513E-002) differ by less than 4E-4 (3.558850765195132e-07) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946420380865552E-002) differ by less than 4E-4 (3.4425984429820033e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.821951e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.685219e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.834362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.818527e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112721757974454] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3803s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3381s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [XSECTION] Cross section = 0.1056 [0.10562802567355024] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.3597s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112721757974454) differ by less than 4E-4 (2.655042234289695e-06) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562802567355024) differ by less than 4E-4 (3.324912595248364e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238453732924513E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.0303s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4587s for 90112 events => throughput is 1.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.07995 [7.9946420380865552E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 1.9605s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5088s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4512s for 90112 events => throughput is 2.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238453732924513E-002) differ by less than 4E-4 (3.558850765195132e-07) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946420380865552E-002) differ by less than 4E-4 (3.4425984429820033e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.018262e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.996828e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.019326e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.012125e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112723389095883] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.3929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0549s for 8192 events => throughput is 1.49E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [XSECTION] Cross section = 0.1056 [0.10562804211436801] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.3711s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3174s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0531s for 8192 events => throughput is 1.54E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112723389095883) differ by less than 4E-4 (2.493748653908945e-06) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562804211436801) differ by less than 4E-4 (1.768430573090285e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238464413054557E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.1189s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5295s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5889s for 90112 events => throughput is 1.53E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [XSECTION] Cross section = 0.07995 [7.9946430964077192E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.0898s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5144s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5748s for 90112 events => throughput is 1.57E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238464413054557E-002) differ by less than 4E-4 (2.2110043929046697e-07) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946430964077192E-002) differ by less than 4E-4 (2.1188108423331187e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.561264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.539215e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.545662e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.581223e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112725654777677] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7590s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0010s for 8192 events => throughput is 8.12E+06 events/s + [XSECTION] Cross section = 0.1056 [0.10562804625987131] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.7551s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7506s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.44E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112725654777677) differ by less than 4E-4 (2.269706518509551e-06) +OK! xsec from fortran (0.10562806079395722) and cuda (0.10562804625987131) differ by less than 4E-4 (1.375968260441951e-07) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238470908598507E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9627s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9510s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0105s for 90112 events => throughput is 8.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [XSECTION] Cross section = 0.07995 [7.9946440545672862E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 1.9554s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9408s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0135s for 90112 events => throughput is 6.70E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238470908598507E-002) differ by less than 4E-4 (1.3912582552677577e-07) +OK! xsec from fortran (7.9946447903217255E-002) and cuda (7.9946440545672862E-002) differ by less than 4E-4 (9.203091055898227e-08) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.151184e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.796399e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.548948e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.004982e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.576425e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.315412e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.715469e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.200957e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.585156e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.304813e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.753005e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.253188e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.440113e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.204910e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.293588e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.257284e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index b6592dfe65..10937dc1fa 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:46:20 +DATE: 2024-08-21_23:24:52 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 - [UNWEIGHT] Wrote 365 events (found 1496 events) - [COUNTERS] PROGRAM TOTAL : 0.6929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s - [COUNTERS] Fortran MEs ( 1 ) : 0.3227s for 8192 events => throughput is 2.54E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 + [UNWEIGHT] Wrote 387 events (found 1591 events) + [COUNTERS] PROGRAM TOTAL : 0.6814s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3607s + [COUNTERS] Fortran MEs ( 1 ) : 0.3207s for 8192 events => throughput is 2.55E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748607749111] fbridge_mode=0 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6641s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3385s - [COUNTERS] Fortran MEs ( 1 ) : 0.3256s for 8192 events => throughput is 2.52E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.6411s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3220s + [COUNTERS] Fortran MEs ( 1 ) : 0.3192s for 8192 events => throughput is 2.57E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481932717722E-002] fbridge_mode=0 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.1698s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5570s - [COUNTERS] Fortran MEs ( 1 ) : 3.6128s for 90112 events => throughput is 2.49E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 5.0315s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4940s + [COUNTERS] Fortran MEs ( 1 ) : 3.5375s for 90112 events => throughput is 2.55E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748700702684] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.6766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3338s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3416s for 8192 events => throughput is 2.40E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806184450918] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.6699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3452s for 8192 events => throughput is 2.37E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700702684) differ by less than 2E-4 (9.191721828116783e-09) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806184450918) differ by less than 2E-4 (9.945765988561561e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482679400354E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 5.3154s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5455s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.7687s for 90112 events => throughput is 2.39E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946448664873659E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 5.2814s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5219s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.7583s for 90112 events => throughput is 2.40E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482679400354E-002) differ by less than 2E-4 (9.423232416594374e-09) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448664873659E-002) differ by less than 2E-4 (9.527082456273206e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.463950e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.494235e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.478616e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.514100e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748702805033] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.5103s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1749s for 8192 events => throughput is 4.68E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806177750511] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.5093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3245s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1840s for 8192 events => throughput is 4.45E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748702805033) differ by less than 2E-4 (9.399612865834683e-09) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806177750511) differ by less than 2E-4 (9.311426296676473e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482683055667E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 3.4746s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5384s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.9354s for 90112 events => throughput is 4.66E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946448673477319E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 3.4139s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5132s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8998s for 90112 events => throughput is 4.74E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482683055667E-002) differ by less than 2E-4 (9.469362849401364e-09) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448673477319E-002) differ by less than 2E-4 (9.634700370853011e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.832626e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.813034e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.815562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829750e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4266s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3394s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0865s for 8192 events => throughput is 9.47E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806158303427] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.3996s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3137s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0852s for 8192 events => throughput is 9.61E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806158303427) differ by less than 2E-4 (7.470335461334798e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.4911s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5269s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9636s for 90112 events => throughput is 9.35E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946448515023347E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.4604s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5166s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9431s for 90112 events => throughput is 9.55E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448515023347E-002) differ by less than 2E-4 (7.652698919002887e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.435081e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.744847e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.477580e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.734137e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748681415580] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4142s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3362s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0774s for 8192 events => throughput is 1.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [XSECTION] Cross section = 0.1056 [0.10562806158303427] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.3974s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0758s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748681415580) differ by less than 2E-4 (7.284514991212632e-09) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806158303427) differ by less than 2E-4 (7.470335461334798e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482534347232E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.3905s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5342s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8556s for 90112 events => throughput is 1.05E+05 events/s + [XSECTION] Cross section = 0.07995 [7.9946448515023347E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.3495s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5091s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8397s for 90112 events => throughput is 1.07E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482534347232E-002) differ by less than 2E-4 (7.592642958798024e-09) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448515023347E-002) differ by less than 2E-4 (7.652698919002887e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.087061e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.083861e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.088736e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.086982e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748700265108] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.4463s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3356s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1100s for 8192 events => throughput is 7.45E+04 events/s + [XSECTION] Cross section = 0.1056 [0.10562806177389659] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.4541s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3299s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1234s for 8192 events => throughput is 6.64E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cpp (0.10112748700265108) differ by less than 2E-4 (9.148451995955043e-09) +OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806177389659) differ by less than 2E-4 (9.277263846030337e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238482666076374E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 2.7724s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5419s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2297s for 90112 events => throughput is 7.33E+04 events/s + [XSECTION] Cross section = 0.07995 [7.9946448645092413E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 2.8259s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5507s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2743s for 90112 events => throughput is 7.07E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cpp (7.9238482666076374E-002) differ by less than 2E-4 (9.255082034087536e-09) +OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448645092413E-002) differ by less than 2E-4 (9.279651269622491e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.268797e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.329854e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.343356e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.409755e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.1011 [0.10112748601943165] fbridge_mode=1 - [UNWEIGHT] Wrote 386 events (found 1179 events) - [COUNTERS] PROGRAM TOTAL : 0.7682s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7592s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s + [XSECTION] Cross section = 0.1056 [0.10562806076929508] fbridge_mode=1 + [UNWEIGHT] Wrote 391 events (found 1147 events) + [COUNTERS] PROGRAM TOTAL : 0.7637s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7514s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 8192 events => throughput is 8.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.10112748607749111) and cuda (0.10112748601943165) differ by less than 2E-4 (5.74121417074025e-10) +OK! xsec from fortran (0.10562806079395722) and cuda (0.10562806076929508) differ by less than 2E-4 (2.334809012793926e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07924 [7.9238481937154381E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1898 events (found 1903 events) - [COUNTERS] PROGRAM TOTAL : 1.9875s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9612s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0233s for 90112 events => throughput is 3.86E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s + [XSECTION] Cross section = 0.07995 [7.9946447910357057E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1853 events (found 1858 events) + [COUNTERS] PROGRAM TOTAL : 1.9901s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9597s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0274s for 90112 events => throughput is 3.29E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9238481932717722E-002) and cuda (7.9238481937154381E-002) differ by less than 2E-4 (5.5991211667105745e-11) +OK! xsec from fortran (7.9946447903217255E-002) and cuda (7.9946447910357057E-002) differ by less than 2E-4 (8.930722827926729e-11) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.654166e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.140804e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.808330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.592923e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.001990e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.426281e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.235577e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.159222e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.000218e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.422944e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.245999e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.170373e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.996930e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.438089e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.726284e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.627314e+06 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 9f965c04b5..28e1b89acf 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,10 +1,10 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cuda - +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -18,8 +18,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:47:02 +DATE: 2024-08-21_23:25:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 - [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.5167s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2657s - [COUNTERS] Fortran MEs ( 1 ) : 4.2511s for 8192 events => throughput is 1.93E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 + [UNWEIGHT] Wrote 7 events (found 223 events) + [COUNTERS] PROGRAM TOTAL : 4.3916s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2504s + [COUNTERS] Fortran MEs ( 1 ) : 4.1412s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4866s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2643s - [COUNTERS] Fortran MEs ( 1 ) : 4.2223s for 8192 events => throughput is 1.94E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 4.3963s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2491s + [COUNTERS] Fortran MEs ( 1 ) : 4.1472s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 48.4461s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8443s - [COUNTERS] Fortran MEs ( 1 ) : 46.6018s for 90112 events => throughput is 1.93E+03 events/s + [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 47.5679s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7910s + [COUNTERS] Fortran MEs ( 1 ) : 45.7769s for 90112 events => throughput is 1.97E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.6404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2618s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3690s for 8192 events => throughput is 1.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0096s + [XSECTION] Cross section = 0.7806 [0.78063416981514666] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 4.5396s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2513s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.2798s for 8192 events => throughput is 1.91E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222236) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514666) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 49.9380s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7954s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.1336s for 90112 events => throughput is 1.87E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s + [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 49.2859s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8217s + [COUNTERS] CudaCpp MEs ( 2 ) : 47.4558s for 90112 events => throughput is 1.90E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926413e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.960157e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.935484e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.963511e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222236] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.6125s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2606s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3472s for 8192 events => throughput is 3.49E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514688] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 2.5260s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2484s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2730s for 8192 events => throughput is 3.60E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0046s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222236) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514688) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099785] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 27.5257s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8027s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.7180s for 90112 events => throughput is 3.50E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s + [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 27.1055s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8077s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.2930s for 90112 events => throughput is 3.56E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099785) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607687) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.649842e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.689861e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.636818e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.680815e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.2653s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2598s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0030s for 8192 events => throughput is 8.17E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 1.2438s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2537s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9875s for 8192 events => throughput is 8.30E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222231) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514699) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.8598s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7908s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.0665s for 90112 events => throughput is 8.14E+03 events/s + [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 12.7703s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8004s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.9675s for 90112 events => throughput is 8.22E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.344831e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.915334e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.416676e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.437730e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.1673s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2599s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9051s for 8192 events => throughput is 9.05E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s + [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 1.1432s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8855s for 8192 events => throughput is 9.25E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222231) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514699) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 11.7872s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8132s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.9717s for 90112 events => throughput is 9.04E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 11.6139s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8175s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.7941s for 90112 events => throughput is 9.20E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.472083e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.083425e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.534343e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.137309e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222231] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.3936s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2589s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1317s for 8192 events => throughput is 7.24E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 1.4435s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2656s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1748s for 8192 events => throughput is 6.97E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556621222231) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514699) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099799] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 14.2691s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8171s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.4493s for 90112 events => throughput is 7.24E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 14.1169s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8159s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.2979s for 90112 events => throughput is 7.33E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083266099799) differ by less than 3E-14 (7.771561172376096e-16) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.935643e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.420117e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.348983e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.396267e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222225] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7693s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6983s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0363s for 8192 events => throughput is 2.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s + [XSECTION] Cross section = 0.7806 [0.78063416981514688] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 0.7556s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6820s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0387s for 8192 events => throughput is 2.12E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0349s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cuda (0.46320556621222225) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.78063416981514677) and cuda (0.78063416981514688) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099782] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.6062s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2048s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3669s for 90112 events => throughput is 2.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0344s + [XSECTION] Cross section = 0.1915 [0.19154487411607690] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 2.6505s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3695s for 90112 events => throughput is 2.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0349s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cuda (0.22558083266099782) differ by less than 3E-14 (1.4432899320127035e-15) +OK! xsec from fortran (0.19154487411607687) and cuda (0.19154487411607690) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.290486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.144921e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.506388e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.350460e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.134196e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.133000e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.177921e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.158003e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.129278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126708e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.155764e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.181777e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126990e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.127323e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.446377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.425233e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index cd633f37c7..98cd036eed 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,26 +1,26 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 - -make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cpp512y + make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:51:00 +DATE: 2024-08-21_23:29:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 - [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.4959s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2635s - [COUNTERS] Fortran MEs ( 1 ) : 4.2323s for 8192 events => throughput is 1.94E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 + [UNWEIGHT] Wrote 7 events (found 223 events) + [COUNTERS] PROGRAM TOTAL : 4.4044s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2499s + [COUNTERS] Fortran MEs ( 1 ) : 4.1545s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4788s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2631s - [COUNTERS] Fortran MEs ( 1 ) : 4.2156s for 8192 events => throughput is 1.94E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 4.3930s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2487s + [COUNTERS] Fortran MEs ( 1 ) : 4.1443s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 48.4352s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8357s - [COUNTERS] Fortran MEs ( 1 ) : 46.5995s for 90112 events => throughput is 1.93E+03 events/s + [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 47.6072s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8001s + [COUNTERS] Fortran MEs ( 1 ) : 45.8071s for 90112 events => throughput is 1.97E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320716615478996] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.5354s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2660s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.2605s for 8192 events => throughput is 1.92E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s + [XSECTION] Cross section = 0.7806 [0.78063782001975612] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 4.4437s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2484s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1867s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320716615478996) differ by less than 4E-4 (3.4540659359372228e-06) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063782001975612) differ by less than 4E-4 (4.67594777497915e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558162567940870] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 48.5468s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7982s - [COUNTERS] CudaCpp MEs ( 2 ) : 46.7401s for 90112 events => throughput is 1.93E+03 events/s + [XSECTION] Cross section = 0.1915 [0.19154547896253576] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 47.8073s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7987s + [COUNTERS] CudaCpp MEs ( 2 ) : 46.0000s for 90112 events => throughput is 1.96E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558162567940870) differ by less than 4E-4 (3.5154512074697664e-06) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154547896253576) differ by less than 4E-4 (3.15772719927665e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +186,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996945e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.008295e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.982014e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.975086e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +206,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320708851010073] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.4573s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2634s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1914s for 8192 events => throughput is 6.88E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [XSECTION] Cross section = 0.7806 [0.78063772189507497] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 1.4105s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2517s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1562s for 8192 events => throughput is 7.09E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320708851010073) differ by less than 4E-4 (3.2864412462529913e-06) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063772189507497) differ by less than 4E-4 (4.55024909951085e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +241,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558157380141428] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 14.6570s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7854s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.8693s for 90112 events => throughput is 7.00E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [XSECTION] Cross section = 0.1915 [0.19154543829232032] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 14.5090s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8076s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.6988s for 90112 events => throughput is 7.10E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558157380141428) differ by less than 4E-4 (3.2854760192435606e-06) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154543829232032) differ by less than 4E-4 (2.9453998497963596e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +266,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.255598e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.254564e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.246435e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.282054e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +286,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2587s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5137s for 8192 events => throughput is 1.59E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [XSECTION] Cross section = 0.7806 [0.78063776840958710] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 0.7544s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2507s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5023s for 8192 events => throughput is 1.63E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320704806184321) differ by less than 4E-4 (3.199118769003917e-06) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063776840958710) differ by less than 4E-4 (4.609834644009325e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +321,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.4672s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7991s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.6666s for 90112 events => throughput is 1.59E+04 events/s + [XSECTION] Cross section = 0.1915 [0.19154546188078725] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 7.3776s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8033s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5729s for 90112 events => throughput is 1.62E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558158459897135) differ by less than 4E-4 (3.333341597855366e-06) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154546188078725) differ by less than 4E-4 (3.0685483654036005e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +346,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.606140e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.672010e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.576957e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.669560e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +366,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320704806184321] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7680s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2709s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4957s for 8192 events => throughput is 1.65E+04 events/s + [XSECTION] Cross section = 0.7806 [0.78063776840958710] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 0.7012s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2465s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4533s for 8192 events => throughput is 1.81E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320704806184321) differ by less than 4E-4 (3.199118769003917e-06) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063776840958710) differ by less than 4E-4 (4.609834644009325e-06) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +401,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558158459897135] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 6.7809s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7804s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9992s for 90112 events => throughput is 1.80E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [XSECTION] Cross section = 0.1915 [0.19154546188078725] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 6.8424s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8236s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.0174s for 90112 events => throughput is 1.80E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558158459897135) differ by less than 4E-4 (3.333341597855366e-06) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154546188078725) differ by less than 4E-4 (3.0685483654036005e-06) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +426,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.849666e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.897416e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.858554e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.887439e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +446,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320713685871445] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.8187s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2599s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5571s for 8192 events => throughput is 1.47E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [XSECTION] Cross section = 0.7806 [0.78063796149473241] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 0.8125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5550s for 8192 events => throughput is 1.48E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320713685871445) differ by less than 4E-4 (3.390819555360025e-06) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063796149473241) differ by less than 4E-4 (4.857178601991308e-06) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +481,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558162184774774] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 7.9104s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7899s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.1190s for 90112 events => throughput is 1.47E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [XSECTION] Cross section = 0.1915 [0.19154549699006573] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 7.8695s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8004s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.0673s for 90112 events => throughput is 1.49E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558162184774774) differ by less than 4E-4 (3.4984654515568536e-06) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154549699006573) differ by less than 4E-4 (3.251843682772204e-06) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +506,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.496224e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.508161e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.504281e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.509788e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +526,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320719394836651] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7396s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6908s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0242s + [XSECTION] Cross section = 0.7806 [0.78063814953416677] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 0.7341s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6826s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.04E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cuda (0.46320719394836651) differ by less than 4E-4 (3.5140686183154912e-06) +OK! xsec from fortran (0.78063416981514677) and cuda (0.78063814953416677) differ by less than 4E-4 (5.098058954988716e-06) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +561,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558167135091578] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.4680s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1917s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2521s for 90112 events => throughput is 3.57E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0241s + [XSECTION] Cross section = 0.1915 [0.19154554638015539] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 2.4979s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2184s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2549s for 90112 events => throughput is 3.54E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0246s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cuda (0.22558167135091578) differ by less than 4E-4 (3.7179130325526444e-06) +OK! xsec from fortran (0.19154487411607687) and cuda (0.19154554638015539) differ by less than 4E-4 (3.5096949559942914e-06) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +586,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.382988e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.103050e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.717142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.405868e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.139748e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.104467e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.304954e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.230140e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.085623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.125382e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.300454e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.224276e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.130448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.086938e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.397157e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.392029e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 27512be658..8da744076b 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -2,9 +2,9 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:54:10 +DATE: 2024-08-21_23:32:36 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 - [UNWEIGHT] Wrote 11 events (found 187 events) - [COUNTERS] PROGRAM TOTAL : 4.4700s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2619s - [COUNTERS] Fortran MEs ( 1 ) : 4.2081s for 8192 events => throughput is 1.95E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 + [UNWEIGHT] Wrote 7 events (found 223 events) + [COUNTERS] PROGRAM TOTAL : 4.4204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2508s + [COUNTERS] Fortran MEs ( 1 ) : 4.1697s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556621222242] fbridge_mode=0 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.4683s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s - [COUNTERS] Fortran MEs ( 1 ) : 4.2079s for 8192 events => throughput is 1.95E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 4.3959s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2472s + [COUNTERS] Fortran MEs ( 1 ) : 4.1488s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083266099815] fbridge_mode=0 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 48.3196s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8366s - [COUNTERS] Fortran MEs ( 1 ) : 46.4830s for 90112 events => throughput is 1.94E+03 events/s + [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 47.6293s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8051s + [COUNTERS] Fortran MEs ( 1 ) : 45.8241s for 90112 events => throughput is 1.97E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556893412546] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 4.6760s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2586s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4088s for 8192 events => throughput is 1.86E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063417389679768] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 4.6218s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2544s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3588s for 8192 events => throughput is 1.88E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556893412546) differ by less than 2E-4 (5.876231279344779e-09) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417389679768) differ by less than 2E-4 (5.2286346363672465e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083370546855] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 50.5724s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8031s - [COUNTERS] CudaCpp MEs ( 2 ) : 48.7604s for 90112 events => throughput is 1.85E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s + [XSECTION] Cross section = 0.1915 [0.19154487480444804] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 50.0743s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8184s + [COUNTERS] CudaCpp MEs ( 2 ) : 48.2472s for 90112 events => throughput is 1.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083370546855) differ by less than 2E-4 (4.630138050742971e-09) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487480444804) differ by less than 2E-4 (3.593785269373484e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.909521e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.932780e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.899981e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.942839e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556780656974] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 2.5687s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2576s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3063s for 8192 events => throughput is 3.55E+03 events/s + [XSECTION] Cross section = 0.7806 [0.78063417194462525] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 2.5436s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2486s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2902s for 8192 events => throughput is 3.58E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556780656974) differ by less than 2E-4 (3.4419864736179306e-09) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417194462525) differ by less than 2E-4 (2.7278828085286477e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083390630859] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 27.4318s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7915s - [COUNTERS] CudaCpp MEs ( 2 ) : 25.6356s for 90112 events => throughput is 3.52E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s + [XSECTION] Cross section = 0.1915 [0.19154487462148848] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 27.1149s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8064s + [COUNTERS] CudaCpp MEs ( 2 ) : 25.3031s for 90112 events => throughput is 3.56E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083390630859) differ by less than 2E-4 (5.520462087460487e-09) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487462148848) differ by less than 2E-4 (2.63860666649407e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.646364e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.637022e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.634455e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660481e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.2686s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2604s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0056s for 8192 events => throughput is 8.15E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 1.2540s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2533s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9984s for 8192 events => throughput is 8.21E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556770726795) differ by less than 2E-4 (3.2276070704995163e-09) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417138500590) differ by less than 2E-4 (2.0110049181454315e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 12.9032s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7920s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.1088s for 90112 events => throughput is 8.11E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 12.8716s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8199s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.0492s for 90112 events => throughput is 8.16E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083379720220) differ by less than 2E-4 (5.036793426782538e-09) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487463123707) differ by less than 2E-4 (2.6895012883443314e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.153831e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.087072e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.410165e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.330091e+03 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.1480s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2607s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8850s for 8192 events => throughput is 9.26E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s + [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 1.1320s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2525s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8774s for 8192 events => throughput is 9.34E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556770726795) differ by less than 2E-4 (3.2276070704995163e-09) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417138500590) differ by less than 2E-4 (2.0110049181454315e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 11.5478s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7830s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7625s for 90112 events => throughput is 9.23E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0024s + [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 11.4083s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7978s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.6083s for 90112 events => throughput is 9.38E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083379720220) differ by less than 2E-4 (5.036793426782538e-09) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487463123707) differ by less than 2E-4 (2.6895012883443314e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.509937e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.748310e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.503575e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.712906e+03 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556770726795] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 1.3881s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2592s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1259s for 8192 events => throughput is 7.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 1.3794s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2528s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1237s for 8192 events => throughput is 7.29E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cpp (0.46320556770726795) differ by less than 2E-4 (3.2276070704995163e-09) +OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417138500590) differ by less than 2E-4 (2.0110049181454315e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083379720220] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 14.4378s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7995s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.6355s for 90112 events => throughput is 7.13E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 14.1562s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8064s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3468s for 90112 events => throughput is 7.30E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cpp (0.22558083379720220) differ by less than 2E-4 (5.036793426782538e-09) +OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487463123707) differ by less than 2E-4 (2.6895012883443314e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.378664e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.143146e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.252552e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.318481e+03 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.4632 [0.46320556665261842] fbridge_mode=1 - [UNWEIGHT] Wrote 11 events (found 168 events) - [COUNTERS] PROGRAM TOTAL : 0.7612s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6909s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0360s for 8192 events => throughput is 2.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0343s + [XSECTION] Cross section = 0.7806 [0.78063416917067197] fbridge_mode=1 + [UNWEIGHT] Wrote 10 events (found 192 events) + [COUNTERS] PROGRAM TOTAL : 0.7765s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0383s for 8192 events => throughput is 2.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.46320556621222242) and cuda (0.46320556665261842) differ by less than 2E-4 (9.507570286615419e-10) +OK! xsec from fortran (0.78063416981514677) and cuda (0.78063416917067197) differ by less than 2E-4 (8.255784944566358e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2256 [0.22558083224243403] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 294 events) - [COUNTERS] PROGRAM TOTAL : 2.5943s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1940s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3660s for 90112 events => throughput is 2.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0343s + [XSECTION] Cross section = 0.1915 [0.19154487447873400] fbridge_mode=1 + [UNWEIGHT] Wrote 27 events (found 312 events) + [COUNTERS] PROGRAM TOTAL : 2.6312s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2256s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3709s for 90112 events => throughput is 2.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0348s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.22558083266099815) and cuda (0.22558083224243403) differ by less than 2E-4 (1.855495090907766e-09) +OK! xsec from fortran (0.19154487411607687) and cuda (0.19154487447873400) differ by less than 2E-4 (1.893327272739498e-09) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.292672e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.165626e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.513091e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359746e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.132768e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.132493e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.151465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.164869e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.134281e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.136773e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.177596e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.166507e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.130147e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120731e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.451952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.419474e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index dab5f736a0..c67135f096 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,28 +1,28 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:59:36 +DATE: 2024-08-21_23:37:58 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 102.0811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5116s - [COUNTERS] Fortran MEs ( 1 ) : 101.5694s for 8192 events => throughput is 8.07E+01 events/s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 96.9734s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4850s + [COUNTERS] Fortran MEs ( 1 ) : 96.4885s for 8192 events => throughput is 8.49E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 102.0739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5163s - [COUNTERS] Fortran MEs ( 1 ) : 101.5576s for 8192 events => throughput is 8.07E+01 events/s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 97.2728s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4754s + [COUNTERS] Fortran MEs ( 1 ) : 96.7974s for 8192 events => throughput is 8.46E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1120.7697s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3745s - [COUNTERS] Fortran MEs ( 1 ) : 1116.3951s for 90112 events => throughput is 8.07E+01 events/s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 1066.0771s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3353s + [COUNTERS] Fortran MEs ( 1 ) : 1061.7418s for 90112 events => throughput is 8.49E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939193E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 122.6268s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5175s - [COUNTERS] CudaCpp MEs ( 2 ) : 121.9186s for 8192 events => throughput is 6.72E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1907s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148501E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 122.8376s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4787s + [COUNTERS] CudaCpp MEs ( 2 ) : 122.1623s for 8192 events => throughput is 6.71E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1966s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939193E-006) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148501E-006) differ by less than 3E-14 (2.6645352591003757e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1388.7153s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3988s - [COUNTERS] CudaCpp MEs ( 2 ) : 1384.1234s for 90112 events => throughput is 6.51E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1931s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130207E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 1348.3124s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2937s + [COUNTERS] CudaCpp MEs ( 2 ) : 1343.8245s for 90112 events => throughput is 6.71E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1942s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130207E-007) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.880201e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.931480e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.389775e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.346009e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939197E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 60.8180s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5182s - [COUNTERS] CudaCpp MEs ( 2 ) : 60.1993s for 8192 events => throughput is 1.36E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1005s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148497E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 59.9516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4786s + [COUNTERS] CudaCpp MEs ( 2 ) : 59.3740s for 8192 events => throughput is 1.38E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0989s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939197E-006) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148497E-006) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656017E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 663.6261s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4076s - [COUNTERS] CudaCpp MEs ( 2 ) : 659.1171s for 90112 events => throughput is 1.37E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1014s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130209E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 658.6475s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2991s + [COUNTERS] CudaCpp MEs ( 2 ) : 654.2486s for 90112 events => throughput is 1.38E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0998s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656017E-007) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130209E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.603881e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.625941e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.607115e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.636342e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 28.7968s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5160s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.2344s for 8192 events => throughput is 2.90E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0464s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 28.4047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4789s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.8793s for 8192 events => throughput is 2.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0466s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148499E-006) differ by less than 3E-14 (2.4424906541753444e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 314.6312s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4324s - [COUNTERS] CudaCpp MEs ( 2 ) : 310.1525s for 90112 events => throughput is 2.91E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0464s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 311.3726s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3146s + [COUNTERS] CudaCpp MEs ( 2 ) : 307.0111s for 90112 events => throughput is 2.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0468s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130220E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.378917e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.513912e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.496128e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.526038e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.3254s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5203s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.7644s for 8192 events => throughput is 3.31E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0408s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 25.0727s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4715s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.5612s for 8192 events => throughput is 3.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0400s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148499E-006) differ by less than 3E-14 (2.4424906541753444e-15) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 277.9808s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4083s - [COUNTERS] CudaCpp MEs ( 2 ) : 273.5305s for 90112 events => throughput is 3.29E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0420s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 277.1350s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2964s + [COUNTERS] CudaCpp MEs ( 2 ) : 272.7990s for 90112 events => throughput is 3.30E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0395s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130220E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.986386e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.072286e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.006448e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.046896e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939191E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.0869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5172s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.5238s for 8192 events => throughput is 3.34E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0459s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 24.4560s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4728s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.9381s for 8192 events => throughput is 3.42E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0450s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985227939191E-006) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148499E-006) differ by less than 3E-14 (2.4424906541753444e-15) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656014E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 271.0840s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3948s - [COUNTERS] CudaCpp MEs ( 2 ) : 266.6404s for 90112 events => throughput is 3.38E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0489s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 269.6330s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3142s + [COUNTERS] CudaCpp MEs ( 2 ) : 265.2762s for 90112 events => throughput is 3.40E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0426s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993086656014E-007) differ by less than 3E-14 (1.9984014443252818e-15) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130220E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.641160e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.656977e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.622116e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.689669e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939195E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 3.2426s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0583s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0970s for 8192 events => throughput is 7.47E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0873s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148501E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 3.1617s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9774s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1002s for 8192 events => throughput is 7.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0841s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985227939195E-006) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (1.2009575548148470E-006) and cuda (1.2009575548148501E-006) differ by less than 3E-14 (2.6645352591003757e-15) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086656006E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 17.9203s - [COUNTERS] Fortran Overhead ( 0 ) : 4.9107s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.9249s for 90112 events => throughput is 7.56E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0847s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130222E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 17.8388s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8039s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.9436s for 90112 events => throughput is 7.54E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0913s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993086656006E-007) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (2.2641741825130175E-007) and cuda (2.2641741825130222E-007) differ by less than 3E-14 (1.9984014443252818e-15) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.521131e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.499559e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.292650e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.234006e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.241733e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.266182e+03 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.585186e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.552602e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.235154e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.275473e+03 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.473644e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.446988e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.236111e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.262534e+03 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.235762e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.240430e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 4ffdbee10a..f74c72034a 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_22:23:03 +DATE: 2024-08-22_00:59:23 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.3873s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5075s - [COUNTERS] Fortran MEs ( 1 ) : 100.8798s for 8192 events => throughput is 8.12E+01 events/s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 97.0139s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4830s + [COUNTERS] Fortran MEs ( 1 ) : 96.5309s for 8192 events => throughput is 8.49E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 102.2416s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5135s - [COUNTERS] Fortran MEs ( 1 ) : 101.7281s for 8192 events => throughput is 8.05E+01 events/s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 96.7363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4803s + [COUNTERS] Fortran MEs ( 1 ) : 96.2560s for 8192 events => throughput is 8.51E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1114.7300s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3428s - [COUNTERS] Fortran MEs ( 1 ) : 1110.3872s for 90112 events => throughput is 8.12E+01 events/s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 1064.7258s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3425s + [COUNTERS] Fortran MEs ( 1 ) : 1060.3833s for 90112 events => throughput is 8.50E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,22 +126,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719945779552E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 111.0089s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5100s - [COUNTERS] CudaCpp MEs ( 2 ) : 110.3187s for 8192 events => throughput is 7.43E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1802s + [XSECTION] Cross section = 1.201e-06 [1.2011250641073541E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 112.6859s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4791s + [COUNTERS] CudaCpp MEs ( 2 ) : 112.0199s for 8192 events => throughput is 7.31E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1869s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719945779552E-006) differ by less than 4E-4 (0.00013985165319851944) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011250641073541E-006) differ by less than 4E-4 (0.000139479777478968) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -161,22 +162,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326290777570335E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1216.8479s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4035s - [COUNTERS] CudaCpp MEs ( 2 ) : 1212.2644s for 90112 events => throughput is 7.43E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1800s + [XSECTION] Cross section = 2.264e-07 [2.2644964998437398E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 1244.7020s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3392s + [COUNTERS] CudaCpp MEs ( 2 ) : 1240.1810s for 90112 events => throughput is 7.27E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1818s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326290777570335E-007) differ by less than 4E-4 (0.00014139226908471692) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644964998437398E-007) differ by less than 4E-4 (0.0001423553599417815) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -185,12 +187,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.795452e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.855192e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.783118e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.844824e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -206,22 +208,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405716994349971E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 27.4750s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5164s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.9120s for 8192 events => throughput is 3.04E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0465s + [XSECTION] Cross section = 1.201e-06 [1.2011248466338516E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 27.2286s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4733s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.7097s for 8192 events => throughput is 3.07E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0457s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716994349971E-006) differ by less than 4E-4 (0.00013961371115600585) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011248466338516E-006) differ by less than 4E-4 (0.00013929869405782114) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -241,22 +244,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326284885505778E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 300.8248s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4082s - [COUNTERS] CudaCpp MEs ( 2 ) : 296.3700s for 90112 events => throughput is 3.04E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0466s + [XSECTION] Cross section = 2.264e-07 [2.2644960006557758E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 298.1598s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3168s + [COUNTERS] CudaCpp MEs ( 2 ) : 293.7965s for 90112 events => throughput is 3.07E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0465s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326284885505778E-007) differ by less than 4E-4 (0.0001411396400787801) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644960006557758E-007) differ by less than 4E-4 (0.00014213488752057302) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -265,12 +269,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.485944e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.511919e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.470723e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.497649e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -286,22 +290,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 14.5936s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5183s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.0522s for 8192 events => throughput is 5.83E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0231s + [XSECTION] Cross section = 1.201e-06 [1.2011248461798598E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 14.5108s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4752s + [COUNTERS] CudaCpp MEs ( 2 ) : 14.0129s for 8192 events => throughput is 5.85E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0227s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716646933743E-006) differ by less than 4E-4 (0.00013958570271999093) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011248461798598E-006) differ by less than 4E-4 (0.0001392983160330985) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -321,22 +326,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 158.5014s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4348s - [COUNTERS] CudaCpp MEs ( 2 ) : 154.0430s for 90112 events => throughput is 5.85E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0236s + [XSECTION] Cross section = 2.264e-07 [2.2644951222621394E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 158.5681s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3155s + [COUNTERS] CudaCpp MEs ( 2 ) : 154.2287s for 90112 events => throughput is 5.84E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0239s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277033163402E-007) differ by less than 4E-4 (0.00014080296191987252) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644951222621394E-007) differ by less than 4E-4 (0.000141746934313014) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -345,12 +351,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.991558e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.099740e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.952358e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.119533e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -366,22 +372,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405716646933743E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 12.8606s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5199s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3203s for 8192 events => throughput is 6.65E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0204s + [XSECTION] Cross section = 1.201e-06 [1.2011248461798598E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 12.8834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4713s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3915s for 8192 events => throughput is 6.61E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0207s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405716646933743E-006) differ by less than 4E-4 (0.00013958570271999093) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011248461798598E-006) differ by less than 4E-4 (0.0001392983160330985) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -401,22 +408,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326277033163402E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 139.5398s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3981s - [COUNTERS] CudaCpp MEs ( 2 ) : 135.1212s for 90112 events => throughput is 6.67E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0205s + [XSECTION] Cross section = 2.264e-07 [2.2644951222621394E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 139.2471s + [COUNTERS] Fortran Overhead ( 0 ) : 4.2922s + [COUNTERS] CudaCpp MEs ( 2 ) : 134.9348s for 90112 events => throughput is 6.68E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0201s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326277033163402E-007) differ by less than 4E-4 (0.00014080296191987252) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644951222621394E-007) differ by less than 4E-4 (0.000141746934313014) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -425,12 +433,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.890802e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.081127e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.069181e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.069440e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -446,22 +454,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405719257109645E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 12.8130s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5166s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.2739s for 8192 events => throughput is 6.67E+02 events/s + [XSECTION] Cross section = 1.201e-06 [1.2011251360912330E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 12.3768s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4770s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.8774s for 8192 events => throughput is 6.90E+02 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0225s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2405719257109645E-006) differ by less than 4E-4 (0.00013979613314640815) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011251360912330E-006) differ by less than 4E-4 (0.00013953971621583072) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -481,22 +490,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326283665697276E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 139.5916s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4260s - [COUNTERS] CudaCpp MEs ( 2 ) : 135.1428s for 90112 events => throughput is 6.67E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0228s + [XSECTION] Cross section = 2.264e-07 [2.2644957106171463E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 136.4090s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3035s + [COUNTERS] CudaCpp MEs ( 2 ) : 132.0816s for 90112 events => throughput is 6.82E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0239s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3326283665697276E-007) differ by less than 4E-4 (0.00014108733939433016) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644957106171463E-007) differ by less than 4E-4 (0.00014200678844056291) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -505,12 +515,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.223008e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.281845e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.135239e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.373062e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -525,22 +535,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.241e-06 [1.2405721007137020E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.1089s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0215s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5332s for 8192 events => throughput is 1.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5542s + [XSECTION] Cross section = 1.201e-06 [1.2011257191623754E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.0557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9634s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5338s for 8192 events => throughput is 1.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5585s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2405721007137020E-006) differ by less than 4E-4 (0.00013993721904270728) +OK! xsec from fortran (1.2009575548148470E-006) and cuda (1.2011257191623754E-006) differ by less than 4E-4 (0.00014002522141964846) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -559,22 +570,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.333e-07 [2.3326295421688232E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 11.2844s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8851s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.8421s for 90112 events => throughput is 1.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5572s + [XSECTION] Cross section = 2.264e-07 [2.2644969729873264E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 11.1748s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7805s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.8427s for 90112 events => throughput is 1.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5517s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3326295421688232E-007) differ by less than 4E-4 (0.00014159139095037965) +OK! xsec from fortran (2.2641741825130175E-007) and cuda (2.2644969729873264E-007) differ by less than 4E-4 (0.0001425643295476231) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -583,42 +595,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.533878e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.541524e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.547825e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.549292e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.147653e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.099302e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.124611e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.212509e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.134315e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.136958e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.131039e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.131463e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.139642e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.113807e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.021489e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.956391e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index e8248fddca..cd2b108cad 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,14 +1,15 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' @@ -16,7 +17,6 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_23:26:17 +DATE: 2024-08-22_02:01:57 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 103.0122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5139s - [COUNTERS] Fortran MEs ( 1 ) : 102.4983s for 8192 events => throughput is 7.99E+01 events/s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 97.1231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4738s + [COUNTERS] Fortran MEs ( 1 ) : 96.6493s for 8192 events => throughput is 8.48E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985227939174E-006] fbridge_mode=0 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 101.2993s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5294s - [COUNTERS] Fortran MEs ( 1 ) : 100.7699s for 8192 events => throughput is 8.13E+01 events/s + [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 97.0792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4770s + [COUNTERS] Fortran MEs ( 1 ) : 96.6022s for 8192 events => throughput is 8.48E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993086655967E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1118.7642s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3619s - [COUNTERS] Fortran MEs ( 1 ) : 1114.4022s for 90112 events => throughput is 8.09E+01 events/s + [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 1068.1865s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3396s + [COUNTERS] Fortran MEs ( 1 ) : 1063.8469s for 90112 events => throughput is 8.47E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985299359844E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 125.7885s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5193s - [COUNTERS] CudaCpp MEs ( 2 ) : 125.0621s for 8192 events => throughput is 6.55E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2071s + [XSECTION] Cross section = 1.201e-06 [1.2009575613215040E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 122.0617s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4956s + [COUNTERS] CudaCpp MEs ( 2 ) : 121.3618s for 8192 events => throughput is 6.75E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2043s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985299359844E-006) differ by less than 2E-4 (5.7578810608305275e-09) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575613215040E-006) differ by less than 2E-4 (5.417890802661418e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993212353001E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 1322.8827s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3903s - [COUNTERS] CudaCpp MEs ( 2 ) : 1318.2870s for 90112 events => throughput is 6.84E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2054s + [XSECTION] Cross section = 2.264e-07 [2.2641741947481977E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 1300.3896s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3107s + [COUNTERS] CudaCpp MEs ( 2 ) : 1295.8740s for 90112 events => throughput is 6.95E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2049s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993212353001E-007) differ by less than 2E-4 (5.389404034161771e-09) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741947481977E-007) differ by less than 2E-4 (5.40381583924443e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.761597e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.809869e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.724704e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.873331e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985295828471E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 62.4510s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5155s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.8333s for 8192 events => throughput is 1.32E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1022s + [XSECTION] Cross section = 1.201e-06 [1.2009575624556593E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 61.0754s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4769s + [COUNTERS] CudaCpp MEs ( 2 ) : 60.4998s for 8192 events => throughput is 1.35E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0988s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985295828471E-006) differ by less than 2E-4 (5.473184350179849e-09) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575624556593E-006) differ by less than 2E-4 (6.3622667134666244e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222645653E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 684.8121s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4198s - [COUNTERS] CudaCpp MEs ( 2 ) : 680.2921s for 90112 events => throughput is 1.32E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1003s + [XSECTION] Cross section = 2.264e-07 [2.2641741950090396E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 673.9577s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3272s + [COUNTERS] CudaCpp MEs ( 2 ) : 669.5308s for 90112 events => throughput is 1.35E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0997s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222645653E-007) differ by less than 2E-4 (5.830713245558172e-09) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741950090396E-007) differ by less than 2E-4 (5.519019907751499e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.589042e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.597786e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.588931e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.593442e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 27.0092s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5181s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.4459s for 8192 events => throughput is 3.10E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0452s + [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 27.1778s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4735s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.6599s for 8192 events => throughput is 3.07E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0444s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575626927521E-006) differ by less than 2E-4 (6.55968657170547e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 298.0409s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4173s - [COUNTERS] CudaCpp MEs ( 2 ) : 293.5790s for 90112 events => throughput is 3.07E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0445s + [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 296.7376s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3098s + [COUNTERS] CudaCpp MEs ( 2 ) : 292.3842s for 90112 events => throughput is 3.08E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741946798811E-007) differ by less than 2E-4 (5.3736428640149825e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.648206e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.679158e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.625373e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.695318e+02 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 24.3540s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5168s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.7936s for 8192 events => throughput is 3.44E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s + [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 23.7526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4784s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.2363s for 8192 events => throughput is 3.53E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0379s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575626927521E-006) differ by less than 2E-4 (6.55968657170547e-09) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 269.6777s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4164s - [COUNTERS] CudaCpp MEs ( 2 ) : 265.2234s for 90112 events => throughput is 3.40E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0378s + [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 261.7863s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3209s + [COUNTERS] CudaCpp MEs ( 2 ) : 257.4280s for 90112 events => throughput is 3.50E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0375s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741946798811E-007) differ by less than 2E-4 (5.3736428640149825e-09) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.285493e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.307001e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.289545e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.317389e+02 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985293629285E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 25.1227s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5145s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.5642s for 8192 events => throughput is 3.33E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0441s + [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 24.7488s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4758s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.2293s for 8192 events => throughput is 3.38E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cpp (1.2403985293629285E-006) differ by less than 2E-4 (5.29588750630694e-09) +OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575626927521E-006) differ by less than 2E-4 (6.55968657170547e-09) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993222447204E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 274.1583s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4200s - [COUNTERS] CudaCpp MEs ( 2 ) : 269.6946s for 90112 events => throughput is 3.34E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0436s + [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 268.0088s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3419s + [COUNTERS] CudaCpp MEs ( 2 ) : 263.6236s for 90112 events => throughput is 3.42E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0433s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cpp (2.3322993222447204E-007) differ by less than 2E-4 (5.822204496297445e-09) +OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741946798811E-007) differ by less than 2E-4 (5.3736428640149825e-09) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.625912e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.743317e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.662510e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.691916e+02 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 1.24e-06 [1.2403985217419736E-006] fbridge_mode=1 - [UNWEIGHT] Wrote 70 events (found 407 events) - [COUNTERS] PROGRAM TOTAL : 2.7717s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0261s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8763s for 8192 events => throughput is 9.35E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8694s + [XSECTION] Cross section = 1.201e-06 [1.2009575531257951E-006] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.7248s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9716s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8776s for 8192 events => throughput is 9.33E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8755s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (1.2403985227939174E-006) and cuda (1.2403985217419736E-006) differ by less than 2E-4 (8.480691704448873e-10) +OK! xsec from fortran (1.2009575548148470E-006) and cuda (1.2009575531257951E-006) differ by less than 2E-4 (1.4064209796771365e-09) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.332e-07 [2.3322993078576733E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 303 events (found 1531 events) - [COUNTERS] PROGRAM TOTAL : 15.2659s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8943s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5013s for 90112 events => throughput is 9.48E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8704s + [XSECTION] Cross section = 2.264e-07 [2.2641741822510739E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 418 events (found 1570 events) + [COUNTERS] PROGRAM TOTAL : 15.1519s + [COUNTERS] Fortran Overhead ( 0 ) : 4.7832s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.4907s for 90112 events => throughput is 9.49E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8780s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3322993086655967E-007) and cuda (2.3322993078576733E-007) differ by less than 2E-4 (3.464063480507207e-10) +OK! xsec from fortran (2.2641741825130175E-007) and cuda (2.2641741822510739E-007) differ by less than 2E-4 (1.1569056823645951e-10) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.434661e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.438154e+03 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.089765e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080719e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.112116e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.105596e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 512 32 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.160890e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.153192e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108390e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110229e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111312e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.110080e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109990e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109639e+04 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.638783e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.675260e+03 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index b877c26fea..020a3ed712 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:58:09 +DATE: 2024-08-21_23:36:33 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 - [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4754s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4051s - [COUNTERS] Fortran MEs ( 1 ) : 0.0703s for 8192 events => throughput is 1.16E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 + [UNWEIGHT] Wrote 506 events (found 1943 events) + [COUNTERS] PROGRAM TOTAL : 0.4726s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4036s + [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4153s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s - [COUNTERS] Fortran MEs ( 1 ) : 0.0708s for 8192 events => throughput is 1.16E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.4034s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3336s + [COUNTERS] Fortran MEs ( 1 ) : 0.0699s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3303s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5573s - [COUNTERS] Fortran MEs ( 1 ) : 0.7730s for 90112 events => throughput is 1.17E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.2186s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4557s + [COUNTERS] Fortran MEs ( 1 ) : 0.7629s for 90112 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263335] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4189s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3418s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0764s for 8192 events => throughput is 1.07E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520600993] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.4086s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0744s for 8192 events => throughput is 1.10E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263335) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520600993) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3766s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8384s for 90112 events => throughput is 1.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.3251s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4975s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8266s for 90112 events => throughput is 1.09E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293388) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.104999e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.102431e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.080050e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.102848e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351262541] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3875s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3450s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0419s for 8192 events => throughput is 1.96E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520601043] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3281s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351262541) differ by less than 3E-14 (2.90878432451791e-14) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601043) differ by less than 3E-14 (1.9984014443252818e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561281] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.0024s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5394s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4624s for 90112 events => throughput is 1.95E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382983293380] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.9467s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4915s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4546s for 90112 events => throughput is 1.98E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561281) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293380) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.937885e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.005465e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.972484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989351e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3673s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3427s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3542s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3300s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0235s for 8192 events => throughput is 3.48E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601038) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8108s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5445s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2657s for 90112 events => throughput is 3.39E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.7567s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4944s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2617s for 90112 events => throughput is 3.44E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.384861e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.353079e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.378583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.328584e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3684s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3456s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.69E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3309s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0212s for 8192 events => throughput is 3.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601038) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7798s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2375s for 90112 events => throughput is 3.79E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.7299s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2372s for 90112 events => throughput is 3.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.465878e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.674323e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.626688e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.785001e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263341] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3809s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3477s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0325s for 8192 events => throughput is 2.52E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3677s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3347s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539351263341) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601038) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8986s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3549s for 90112 events => throughput is 2.54E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.8509s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5019s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3483s for 90112 events => throughput is 2.59E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686556561295) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.412835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.506958e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.491870e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.508939e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263363] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7685s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.03E+07 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520601049] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.7681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7638s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539351263363) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (0.27343385520600988) and cuda (0.27343385520601049) differ by less than 3E-14 (2.220446049250313e-15) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561304] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9737s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9648s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s + [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.9383s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9265s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.47E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686556561304) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.21182382983293388) and cuda (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.555983e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.077452e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.037158e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.475646e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.629928e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.336316e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.566255e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.176205e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.636845e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.337529e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.850724e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.372428e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.619360e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.338223e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.790736e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.653233e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index 8ac388b886..81fbed4576 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -4,8 +4,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/g make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,17 +13,17 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:58:38 +DATE: 2024-08-21_23:37:01 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 - [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4756s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4044s - [COUNTERS] Fortran MEs ( 1 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 + [UNWEIGHT] Wrote 506 events (found 1943 events) + [COUNTERS] PROGRAM TOTAL : 0.4687s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4000s + [COUNTERS] Fortran MEs ( 1 ) : 0.0687s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4108s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s - [COUNTERS] Fortran MEs ( 1 ) : 0.0688s for 8192 events => throughput is 1.19E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3970s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3265s + [COUNTERS] Fortran MEs ( 1 ) : 0.0706s for 8192 events => throughput is 1.16E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3245s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5525s - [COUNTERS] Fortran MEs ( 1 ) : 0.7719s for 90112 events => throughput is 1.17E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.2308s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4622s + [COUNTERS] Fortran MEs ( 1 ) : 0.7687s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110463158198617] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4137s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0712s for 8192 events => throughput is 1.15E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343387711996092] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.4011s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3302s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0703s for 8192 events => throughput is 1.17E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110463158198617) differ by less than 4E-4 (2.8104591991429118e-06) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343387711996092) differ by less than 4E-4 (8.014351782215101e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686347932190] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3233s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7851s for 90112 events => throughput is 1.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [XSECTION] Cross section = 0.2118 [0.21182383177444153] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.2786s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7768s for 90112 events => throughput is 1.16E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686347932190) differ by less than 4E-4 (9.698858494111562e-09) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182383177444153) differ by less than 4E-4 (9.165671555066979e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.154270e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182628e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.117776e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.135680e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110459183868807] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3703s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3439s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.2734 [0.27343383490650730] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3554s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3288s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0260s for 8192 events => throughput is 3.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110459183868807) differ by less than 4E-4 (2.9570564231695684e-06) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343383490650730) differ by less than 4E-4 (7.423917047777451e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510683073685827] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8197s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5348s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2844s for 90112 events => throughput is 3.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.2118 [0.21182379928139489] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.7825s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5002s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2818s for 90112 events => throughput is 3.20E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510683073685827) differ by less than 4E-4 (1.6191372875784538e-07) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182379928139489) differ by less than 4E-4 (1.4423088756654323e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.998738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.086516e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.994620e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.150660e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3581s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.29E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343382982076725] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3464s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3332s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0127s for 8192 events => throughput is 6.43E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460727141733) differ by less than 4E-4 (2.9001312211729413e-06) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343382982076725) differ by less than 4E-4 (9.283869628617936e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6873s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5442s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1427s for 90112 events => throughput is 6.31E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182379586387554] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.6359s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4958s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1396s for 90112 events => throughput is 6.46E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682516942223) differ by less than 4E-4 (1.8779591537398943e-07) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182379586387554) differ by less than 4E-4 (1.6036466898849966e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.110364e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.062674e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.231132e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.222354e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110460727141733] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3551s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3423s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0124s for 8192 events => throughput is 6.61E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343382982076725] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3437s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3306s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0126s for 8192 events => throughput is 6.48E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110460727141733) differ by less than 4E-4 (2.9001312211729413e-06) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343382982076725) differ by less than 4E-4 (9.283869628617936e-08) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510682516942223] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.6706s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5390s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1312s for 90112 events => throughput is 6.87E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182379586387554] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.6264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4967s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1293s for 90112 events => throughput is 6.97E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510682516942223) differ by less than 4E-4 (1.8779591537398943e-07) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182379586387554) differ by less than 4E-4 (1.6036466898849966e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.737889e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.684833e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.863785e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.821989e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110464220032526] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3592s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3420s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0167s for 8192 events => throughput is 4.91E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343386589929475] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3490s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3315s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0170s for 8192 events => throughput is 4.83E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110464220032526) differ by less than 4E-4 (2.771292368253242e-06) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343386589929475) differ by less than 4E-4 (3.9107391769377386e-08) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510685471570221] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7199s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1795s for 90112 events => throughput is 5.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [XSECTION] Cross section = 0.2118 [0.21182382325497387] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.6876s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5098s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1772s for 90112 events => throughput is 5.09E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510685471570221) differ by less than 4E-4 (5.043963013928732e-08) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382325497387) differ by less than 4E-4 (3.105391876978558e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.872478e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.028392e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.938459e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.834126e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110477321990667] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) + [XSECTION] Cross section = 0.2734 [0.27343391019497171] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) [COUNTERS] PROGRAM TOTAL : 0.7679s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.31E+07 events/s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7639s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110477321990667) differ by less than 4E-4 (2.2880132283242816e-06) +OK! xsec from fortran (0.27343385520600988) and cuda (0.27343391019497171) differ by less than 4E-4 (2.011051696282351e-07) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510689318513457] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9690s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9617s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0063s for 90112 events => throughput is 1.43E+07 events/s + [XSECTION] Cross section = 0.2118 [0.21182386711435958] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.9377s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9277s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 90112 events => throughput is 9.95E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510689318513457) differ by less than 4E-4 (1.2839907048700638e-07) +OK! xsec from fortran (0.21182382983293388) and cuda (0.21182386711435958) differ by less than 4E-4 (1.7600203783274537e-07) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.567743e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.313904e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.424411e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.406775e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.006580e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.153018e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.460162e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.636283e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.113271e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.128642e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.506902e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.757313e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.545880e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.740120e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.393633e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.187604e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 25661e1063..74ef61bb65 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-08_20:59:06 +DATE: 2024-08-21_23:37:29 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 - [UNWEIGHT] Wrote 404 events (found 1817 events) - [COUNTERS] PROGRAM TOTAL : 0.4768s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4060s - [COUNTERS] Fortran MEs ( 1 ) : 0.0709s for 8192 events => throughput is 1.16E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 + [UNWEIGHT] Wrote 506 events (found 1943 events) + [COUNTERS] PROGRAM TOTAL : 0.4711s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4021s + [COUNTERS] Fortran MEs ( 1 ) : 0.0690s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539351263330] fbridge_mode=0 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4179s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3473s - [COUNTERS] Fortran MEs ( 1 ) : 0.0706s for 8192 events => throughput is 1.16E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.4011s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3314s + [COUNTERS] Fortran MEs ( 1 ) : 0.0697s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686556561295] fbridge_mode=0 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3258s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5517s - [COUNTERS] Fortran MEs ( 1 ) : 0.7741s for 90112 events => throughput is 1.16E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.2267s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4591s + [COUNTERS] Fortran MEs ( 1 ) : 0.7675s for 90112 events => throughput is 1.17E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539350666329] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.4207s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0763s for 8192 events => throughput is 1.07E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385506612239] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.4049s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3282s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0759s for 8192 events => throughput is 1.08E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539350666329) differ by less than 2E-4 (2.2020940626532592e-11) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385506612239) differ by less than 2E-4 (5.115953216616731e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686560103207] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 2.3663s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5373s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8282s for 90112 events => throughput is 1.09E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [XSECTION] Cross section = 0.2118 [0.21182382982924081] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.3238s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5018s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8213s for 90112 events => throughput is 1.10E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560103207) differ by less than 2E-4 (1.646582870051816e-10) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382982924081) differ by less than 2E-4 (1.743460931180607e-11) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.091070e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.101776e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.097593e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.101637e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539350666335] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3890s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3472s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385506612239] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3737s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3322s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539350666335) differ by less than 2E-4 (2.2020718581927667e-11) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385506612239) differ by less than 2E-4 (5.115953216616731e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686560103204] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9944s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5398s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4540s for 90112 events => throughput is 1.98E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382982924075] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.9560s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5070s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4484s for 90112 events => throughput is 2.01E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686560103204) differ by less than 2E-4 (1.6465806496057667e-10) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382982924075) differ by less than 2E-4 (1.7434942378713458e-11) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.922053e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989014e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.990970e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.964705e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3492s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.46E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3541s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3303s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0232s for 8192 events => throughput is 3.53E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330887440) differ by less than 2E-4 (7.515855715567454e-10) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385527282038) differ by less than 2E-4 (2.4433877143792415e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.8003s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5375s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2622s for 90112 events => throughput is 3.44E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.7602s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4992s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2604s for 90112 events => throughput is 3.46E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686557693198) differ by less than 2E-4 (5.262057456434377e-11) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382979024772) differ by less than 2E-4 (2.0151724733352694e-10) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.424784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.397345e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.455227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.423227e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3680s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.88E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3511s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3298s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330887440) differ by less than 2E-4 (7.515855715567454e-10) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385527282038) differ by less than 2E-4 (2.4433877143792415e-10) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.7822s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5448s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2369s for 90112 events => throughput is 3.80E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.7394s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5095s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2294s for 90112 events => throughput is 3.93E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686557693198) differ by less than 2E-4 (5.262057456434377e-11) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382979024772) differ by less than 2E-4 (2.0151724733352694e-10) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.843024e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.800485e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.890496e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.827388e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539330887440] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.3872s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0362s for 8192 events => throughput is 2.26E+05 events/s + [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.3626s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3297s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.54E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cpp (0.27110539330887440) differ by less than 2E-4 (7.515855715567454e-10) +OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385527282038) differ by less than 2E-4 (2.4433877143792415e-10) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686557693198] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9147s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5452s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3689s for 90112 events => throughput is 2.44E+05 events/s + [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.8592s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4977s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3608s for 90112 events => throughput is 2.50E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cpp (0.21510686557693198) differ by less than 2E-4 (5.262057456434377e-11) +OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382979024772) differ by less than 2E-4 (2.0151724733352694e-10) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.300565e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.391597e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.415614e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.393779e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2711 [0.27110539343558537] fbridge_mode=1 - [UNWEIGHT] Wrote 404 events (found 1228 events) - [COUNTERS] PROGRAM TOTAL : 0.7684s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7665s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0008s for 8192 events => throughput is 1.09E+07 events/s + [XSECTION] Cross section = 0.2734 [0.27343385529180364] fbridge_mode=1 + [UNWEIGHT] Wrote 491 events (found 1236 events) + [COUNTERS] PROGRAM TOTAL : 0.7639s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7597s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.27110539351263330) and cuda (0.27110539343558537) differ by less than 2E-4 (2.8419910869104115e-10) +OK! xsec from fortran (0.27343385520600988) and cuda (0.27343385529180364) differ by less than 2E-4 (3.1376412579220414e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2151 [0.21510686553631395] fbridge_mode=1 - [UNWEIGHT] Wrote 1939 events (found 1944 events) - [COUNTERS] PROGRAM TOTAL : 1.9688s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9599s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.15E+07 events/s + [XSECTION] Cross section = 0.2118 [0.21182382978588427] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 1.9462s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9343s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 90112 events => throughput is 8.49E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21510686556561295) and cuda (0.21510686553631395) differ by less than 2E-4 (1.3620671257541517e-10) +OK! xsec from fortran (0.21182382983293388) and cuda (0.21182382978588427) differ by less than 2E-4 (2.2211676942163194e-10) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.565914e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.943095e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.104681e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489937e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.636309e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.342569e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.555697e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.224982e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.642280e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.317376e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.824016e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.362791e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.612307e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.326289e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.778614e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657342e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 9204db3db0..537d9ab035 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/h make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:48:38 +DATE: 2024-08-22_03:23:40 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 - [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9141s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8671s - [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [UNWEIGHT] Wrote 3371 events (found 6399 events) + [COUNTERS] PROGRAM TOTAL : 0.9347s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8882s + [COUNTERS] Fortran MEs ( 1 ) : 0.0465s for 8192 events => throughput is 1.76E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4185s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3716s - [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4335s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3843s + [COUNTERS] Fortran MEs ( 1 ) : 0.0493s for 8192 events => throughput is 1.66E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7982s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2863s - [COUNTERS] Fortran MEs ( 1 ) : 0.5119s for 90112 events => throughput is 1.76E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.8992s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3586s + [COUNTERS] Fortran MEs ( 1 ) : 0.5406s for 90112 events => throughput is 1.67E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256148] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0500s for 8192 events => throughput is 1.64E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4145s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3638s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0502s for 8192 events => throughput is 1.63E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256148) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755170) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8165s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2690s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5470s for 90112 events => throughput is 1.65E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.8095s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2687s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5403s for 90112 events => throughput is 1.67E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187286) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.683813e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.704082e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.668738e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.693622e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4071s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3797s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3915s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3639s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 8192 events => throughput is 3.02E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755183) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377564] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5672s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2711s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2957s for 90112 events => throughput is 3.05E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.5668s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2752s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2912s for 90112 events => throughput is 3.09E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377564) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187286) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.037815e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.077969e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.993910e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.093492e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 5.00E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3917s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0165s for 8192 events => throughput is 4.96E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256232) differ by less than 3E-14 (3.552713678800501e-15) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4641s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1835s for 90112 events => throughput is 4.91E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187521] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.4548s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2746s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1798s for 90112 events => throughput is 5.01E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377489) differ by less than 3E-14 (3.885780586188048e-15) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187521) differ by less than 3E-14 (1.1546319456101628e-14) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.902798e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.934832e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.886099e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.004753e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256232] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3876s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3719s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0152s for 8192 events => throughput is 5.38E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3651s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.47E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256232) differ by less than 3E-14 (3.552713678800501e-15) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377489] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4216s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2567s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1645s for 90112 events => throughput is 5.48E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187521] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.4376s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1628s for 90112 events => throughput is 5.54E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377489) differ by less than 3E-14 (3.885780586188048e-15) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187521) differ by less than 3E-14 (1.1546319456101628e-14) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.361206e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.562123e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.494947e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.438518e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256152] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3733s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.68E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3886s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955499256152) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755179) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377560] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5023s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2627s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2391s for 90112 events => throughput is 3.77E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187290] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.5255s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2824s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2426s for 90112 events => throughput is 3.71E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895240377560) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187290) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.615246e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508019e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.662708e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.599772e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256165] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7949s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.8032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7995s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.84E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955499256165) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081479755192) differ by less than 3E-14 (4.440892098500626e-16) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377573] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7013s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6935s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0069s for 90112 events => throughput is 1.30E+07 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187299] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.7264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7162s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 90112 events => throughput is 9.55E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895240377573) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0342240372187286) and cuda (2.0342240372187299) differ by less than 3E-14 (6.661338147750939e-16) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.844829e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.095997e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.285195e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.504453e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.255268e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.855286e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.760215e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.278079e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.235451e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.878989e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.038893e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.591936e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.241445e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.889184e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.725782e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.521297e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index ae36851550..042dc1d8e8 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:49:04 +DATE: 2024-08-22_03:24:06 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 - [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9394s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8922s - [COUNTERS] Fortran MEs ( 1 ) : 0.0473s for 8192 events => throughput is 1.73E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [UNWEIGHT] Wrote 3371 events (found 6399 events) + [COUNTERS] PROGRAM TOTAL : 0.9477s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8990s + [COUNTERS] Fortran MEs ( 1 ) : 0.0486s for 8192 events => throughput is 1.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4203s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3728s - [COUNTERS] Fortran MEs ( 1 ) : 0.0475s for 8192 events => throughput is 1.72E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4149s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3680s + [COUNTERS] Fortran MEs ( 1 ) : 0.0469s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.7988s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2854s - [COUNTERS] Fortran MEs ( 1 ) : 0.5133s for 90112 events => throughput is 1.76E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.8101s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2948s + [COUNTERS] Fortran MEs ( 1 ) : 0.5153s for 90112 events => throughput is 1.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,43 +125,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162897355760356] fbridge_mode=1 - [UNWEIGHT] Wrote 1620 events (found 1625 events) - [COUNTERS] PROGRAM TOTAL : 0.4180s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3713s + [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 + [UNWEIGHT] Wrote 1653 events (found 1658 events) + [COUNTERS] PROGRAM TOTAL : 0.4156s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3689s [COUNTERS] CudaCpp MEs ( 2 ) : 0.0463s for 8192 events => throughput is 1.77E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162897355760356) differ by less than 4E-4 (2.8836792208553064e-06) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ by less than 4E-4 (1.6138103811513815e-05) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 -6206,6207c6206,6207 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. 1. ---- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.59936081260E+01 0.59936081260E+01 0.00000000000E+00 0. 1. -> 5 1 1 2 501 0 0.45273385612E+02 -0.31131305296E+02 0.47763304676E+03 0.48080583916E+03 0.47000000000E+01 0. -1. -8306,8307c8306,8307 -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. -1. ---- -> 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.23857997239E+02 0.23857997239E+02 0.00000000000E+00 0. -1. -> 5 1 1 2 501 0 -0.34843521722E+02 0.35239303629E+02 0.13219496682E+02 0.51504607743E+02 0.47000000000E+01 0. 1. -9606,9619d9605 -< 4 1 1E-03 0.1250139E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.94948250004E+03 0.94948250004E+03 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.41149990002E+01 0.41149990002E+01 0.00000000000E+00 0. -1. -< 5 1 1 2 501 0 -0.96459450317E+01 -0.34409175043E+02 0.83136584965E+02 0.90613560477E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 0.96459450317E+01 0.34409175043E+02 0.86223091608E+03 0.86298393857E+03 0.47000000000E+01 0. 1. +7562,7575d7561 +< 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00 +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499473E+02 0.71320499473E+02 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239790E+02 0.54771239790E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002893E+02 0.63925016162E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762567893E+01 0.62166723101E+02 0.47000000000E+01 0. -1. < -< 0 0.12501391E+03 +< 0 0.12500099E+03 +< 0 +< 1 21 0.10972385E-01 0.12500099E+03 +< 1 21 0.84263445E-02 0.12500099E+03 +< 0.73891524E+06 +< +< +< diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index d90f539fcf..1aeb1a3188 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:49:10 +DATE: 2024-08-22_03:24:12 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 - [UNWEIGHT] Wrote 3321 events (found 6423 events) - [COUNTERS] PROGRAM TOTAL : 0.9158s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8684s - [COUNTERS] Fortran MEs ( 1 ) : 0.0474s for 8192 events => throughput is 1.73E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [UNWEIGHT] Wrote 3371 events (found 6399 events) + [COUNTERS] PROGRAM TOTAL : 0.8970s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8506s + [COUNTERS] Fortran MEs ( 1 ) : 0.0464s for 8192 events => throughput is 1.77E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955499256161] fbridge_mode=0 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4209s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3739s - [COUNTERS] Fortran MEs ( 1 ) : 0.0470s for 8192 events => throughput is 1.74E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4114s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3656s + [COUNTERS] Fortran MEs ( 1 ) : 0.0458s for 8192 events => throughput is 1.79E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895240377569] fbridge_mode=0 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8008s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2889s - [COUNTERS] Fortran MEs ( 1 ) : 0.5118s for 90112 events => throughput is 1.76E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.7806s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2737s + [COUNTERS] Fortran MEs ( 1 ) : 0.5069s for 90112 events => throughput is 1.78E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -126,22 +126,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975930954] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4229s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3736s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0488s for 8192 events => throughput is 1.68E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4129s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3628s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0496s for 8192 events => throughput is 1.65E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930954) differ by less than 2E-4 (2.3641117063988304e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453331) differ by less than 2E-4 (2.4042469792817656e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -161,22 +162,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383660] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.8077s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2621s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5452s for 90112 events => throughput is 1.65E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240835006229] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.8124s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2700s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5419s for 90112 events => throughput is 1.66E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383660) differ by less than 2E-4 (2.2804427679545825e-08) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240835006229) differ by less than 2E-4 (2.2751621031602554e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -184,15 +186,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.584312e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.537103e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.572139e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568161e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -208,22 +211,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955975930958] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4000s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3717s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 8192 events => throughput is 2.94E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3896s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 8192 events => throughput is 3.03E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955975930958) differ by less than 2E-4 (2.364111728603291e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453336) differ by less than 2E-4 (2.404247001486226e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -243,22 +247,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895706383669] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6068s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3000s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3063s for 90112 events => throughput is 2.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 2.034 [2.0342240835006233] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.5854s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2892s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2957s for 90112 events => throughput is 3.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895706383669) differ by less than 2E-4 (2.2804428123635034e-08) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240835006233) differ by less than 2E-4 (2.275162125364716e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -266,15 +271,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.801476e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.861099e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.739519e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.919059e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -290,22 +296,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4107s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3912s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0191s for 8192 events => throughput is 4.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3796s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3626s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 8192 events => throughput is 4.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953696393) differ by less than 2E-4 (2.2538374055969257e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -325,22 +332,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4541s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2695s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1842s for 90112 events => throughput is 4.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [XSECTION] Cross section = 2.034 [2.0342240828564768] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.4551s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2743s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1804s for 90112 events => throughput is 5.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701245432) differ by less than 2E-4 (2.255298392483951e-08) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240828564768) differ by less than 2E-4 (2.243496655118804e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -348,15 +356,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.846731e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.594828e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.806331e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.842693e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -372,22 +381,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953696393] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.29E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3861s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3700s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0157s for 8192 events => throughput is 5.21E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953696393) differ by less than 2E-4 (2.2538374055969257e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -407,22 +417,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701245432] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.4306s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2629s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1673s for 90112 events => throughput is 5.39E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240828564768] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.4459s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2789s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1666s for 90112 events => throughput is 5.41E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701245432) differ by less than 2E-4 (2.255298392483951e-08) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240828564768) differ by less than 2E-4 (2.243496655118804e-08) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -430,15 +441,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.198253e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.190709e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.334338e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.340290e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -454,22 +466,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955953691082] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4086s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3841s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s + [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.3893s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3665s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.66E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cpp (2.0162955953691082) differ by less than 2E-4 (2.253811048902321e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962970020) differ by less than 2E-4 (2.3968893092529697e-08) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -489,22 +502,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895701243878] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.5232s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2714s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2514s for 90112 events => throughput is 3.58E+05 events/s + [XSECTION] Cross section = 2.034 [2.0342240828564875] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.5317s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2772s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2541s for 90112 events => throughput is 3.55E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cpp (2.0434895701243878) differ by less than 2E-4 (2.255290776354002e-08) +OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240828564875) differ by less than 2E-4 (2.243497188025856e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -512,15 +526,16 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.375382e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.336305e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.300552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.211534e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -535,22 +550,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0162955503257827] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.7989s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.20E+07 events/s + [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.8035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7998s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0162955499256161) and cuda (2.0162955503257827) differ by less than 2E-4 (1.9846613241725208e-10) +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081483021330) differ by less than 2E-4 (1.6201062713605552e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -569,22 +585,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.043 [2.0434895242795732] fbridge_mode=1 - [UNWEIGHT] Wrote 1818 events (found 1823 events) - [COUNTERS] PROGRAM TOTAL : 1.6979s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6904s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [XSECTION] Cross section = 2.034 [2.0342240375655276] fbridge_mode=1 + [UNWEIGHT] Wrote 1858 events (found 1863 events) + [COUNTERS] PROGRAM TOTAL : 1.7165s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7064s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0093s for 90112 events => throughput is 9.69E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0434895240377569) and cuda (2.0434895242795732) differ by less than 2E-4 (1.183348974365117e-10) +OK! xsec from fortran (2.0342240372187286) and cuda (2.0342240375655276) differ by less than 2E-4 (1.7048229494776024e-10) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -593,42 +610,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.835154e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.199365e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.144694e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.502648e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.230105e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.857624e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.705062e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.186054e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.235322e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.876331e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.035545e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.585645e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.242431e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.849572e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754474e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.508879e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 5562e4c07e..ca501dcd31 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -2,12 +2,12 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 + +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:52:08 +DATE: 2024-08-22_03:27:08 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5941s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3442s - [COUNTERS] Fortran MEs ( 1 ) : 2.2499s for 8192 events => throughput is 3.64E+03 events/s + [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 902 events) + [COUNTERS] PROGRAM TOTAL : 2.6268s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3209s + [COUNTERS] Fortran MEs ( 1 ) : 2.3059s for 8192 events => throughput is 3.55E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.6220s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3462s - [COUNTERS] Fortran MEs ( 1 ) : 2.2759s for 8192 events => throughput is 3.60E+03 events/s + [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 2.5565s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3167s + [COUNTERS] Fortran MEs ( 1 ) : 2.2398s for 8192 events => throughput is 3.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.7017s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8086s - [COUNTERS] Fortran MEs ( 1 ) : 24.8931s for 90112 events => throughput is 3.62E+03 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 26.5314s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7959s + [COUNTERS] Fortran MEs ( 1 ) : 24.7355s for 90112 events => throughput is 3.64E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7821s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3463s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4305s for 8192 events => throughput is 3.37E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s + [XSECTION] Cross section = 7.668e-07 [7.6679976028207996E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 2.7394s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3217s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4123s for 8192 events => throughput is 3.40E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0053s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207996E-007) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438187E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 28.5017s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7808s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.7158s for 90112 events => throughput is 3.37E+03 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 28.5116s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8255s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.6809s for 90112 events => throughput is 3.38E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438187E-007) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.542884e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560225e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.530103e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560475e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084412E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.6103s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3441s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2634s for 8192 events => throughput is 6.48E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [XSECTION] Cross section = 7.668e-07 [7.6679976028208017E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 1.5913s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2700s for 8192 events => throughput is 6.45E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084412E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028208017E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 15.9197s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7936s - [COUNTERS] CudaCpp MEs ( 2 ) : 14.1234s for 90112 events => throughput is 6.38E+03 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123633E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 15.7013s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7986s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.9000s for 90112 events => throughput is 6.48E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438230E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123633E-007) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.656588e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.736764e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.664988e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.703656e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9116s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3446s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5653s for 8192 events => throughput is 1.45E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8760s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3204s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5540s for 8192 events => throughput is 1.48E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.0033s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7755s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.2261s for 90112 events => throughput is 1.45E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 7.9712s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8047s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1647s for 90112 events => throughput is 1.46E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.485686e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.506854e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.488153e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.513652e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8483s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4991s for 8192 events => throughput is 1.64E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8210s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3243s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4952s for 8192 events => throughput is 1.65E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.2914s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7820s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5079s for 90112 events => throughput is 1.64E+04 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 7.2734s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8012s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.4708s for 90112 events => throughput is 1.65E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.693554e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.648208e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.678028e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.706590e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9859s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6411s for 8192 events => throughput is 1.28E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.9736s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3185s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6532s for 8192 events => throughput is 1.25E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 8.8930s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7934s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.0976s for 90112 events => throughput is 1.27E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 8.9590s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8109s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1462s for 90112 events => throughput is 1.26E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.269596e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.277931e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.304260e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.285067e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084454E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8106s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7739s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0196s + [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.7965s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7572s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0195s for 8192 events => throughput is 4.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697955084454E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6679976028208006E-007) and cuda (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438198E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4031s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1951s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1884s for 90112 events => throughput is 4.78E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0195s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123665E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.4439s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2302s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1938s for 90112 events => throughput is 4.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0199s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551438198E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6161305627123644E-007) and cuda (7.6161305627123665E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.836004e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.242427e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.223426e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.554723e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.196129e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.857180e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.417377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.230140e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.149870e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.854667e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.416796e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.205345e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.156718e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.855473e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.752894e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.682150e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index e6a1cba79b..5420c542eb 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:54:32 +DATE: 2024-08-22_03:29:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.6010s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3425s - [COUNTERS] Fortran MEs ( 1 ) : 2.2584s for 8192 events => throughput is 3.63E+03 events/s + [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 902 events) + [COUNTERS] PROGRAM TOTAL : 2.5449s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3117s + [COUNTERS] Fortran MEs ( 1 ) : 2.2332s for 8192 events => throughput is 3.67E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.6135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3438s - [COUNTERS] Fortran MEs ( 1 ) : 2.2696s for 8192 events => throughput is 3.61E+03 events/s + [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 2.5504s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3168s + [COUNTERS] Fortran MEs ( 1 ) : 2.2336s for 8192 events => throughput is 3.67E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.5878s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7922s - [COUNTERS] Fortran MEs ( 1 ) : 24.7956s for 90112 events => throughput is 3.63E+03 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 26.4545s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7964s + [COUNTERS] Fortran MEs ( 1 ) : 24.6581s for 90112 events => throughput is 3.65E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896784952157763E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7487s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4000s for 8192 events => throughput is 3.41E+03 events/s + [XSECTION] Cross section = 7.668e-07 [7.6680052401606547E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 2.6995s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3207s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3738s for 8192 events => throughput is 3.45E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896784952157763E-007) differ by less than 4E-4 (1.088869447052332e-06) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680052401606547E-007) differ by less than 4E-4 (9.960018572119367e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668138450782073E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 28.1446s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7932s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.3466s for 90112 events => throughput is 3.42E+03 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161357558617576E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 27.8592s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7949s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.0596s for 90112 events => throughput is 3.46E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668138450782073E-007) differ by less than 4E-4 (7.160651642745819e-07) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161357558617576E-007) differ by less than 4E-4 (6.818619180393171e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.577022e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.591819e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.590866e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.592501e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896766542858863E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.0076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3437s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6623s for 8192 events => throughput is 1.24E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [XSECTION] Cross section = 7.668e-07 [7.6680037387484579E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.9731s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3162s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6552s for 8192 events => throughput is 1.25E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896766542858863E-007) differ by less than 4E-4 (8.584556829838164e-07) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680037387484579E-007) differ by less than 4E-4 (8.001994751261066e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668121906848987E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.0575s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7825s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.2734s for 90112 events => throughput is 1.24E+04 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161341270162819E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 8.9875s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7891s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1967s for 90112 events => throughput is 1.25E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668121906848987E-007) differ by less than 4E-4 (5.002787206720427e-07) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161341270162819E-007) differ by less than 4E-4 (4.679940670548888e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.265218e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273845e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.265996e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.282434e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6296s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2826s for 8192 events => throughput is 2.90E+04 events/s + [XSECTION] Cross section = 7.668e-07 [7.6680038082583914E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.6009s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3179s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2820s for 8192 events => throughput is 2.90E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680038082583914E-007) differ by less than 4E-4 (8.092644145918371e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.9000s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7718s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1273s for 90112 events => throughput is 2.88E+04 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161368606547695E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 4.9168s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7970s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1187s for 90112 events => throughput is 2.89E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161368606547695E-007) differ by less than 4E-4 (8.269215387990414e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.939784e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971452e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.964350e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.958665e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896764408326359E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6110s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3506s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2595s for 8192 events => throughput is 3.16E+04 events/s + [XSECTION] Cross section = 7.668e-07 [7.6680038082583914E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.5744s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3150s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2585s for 8192 events => throughput is 3.17E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896764408326359E-007) differ by less than 4E-4 (8.31739528805997e-07) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680038082583914E-007) differ by less than 4E-4 (8.092644145918371e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668124799901306E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 4.6623s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7820s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8794s for 90112 events => throughput is 3.13E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [XSECTION] Cross section = 7.616e-07 [7.6161368606547695E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 4.6744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7952s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8781s for 90112 events => throughput is 3.13E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668124799901306E-007) differ by less than 4E-4 (5.380134884269694e-07) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161368606547695E-007) differ by less than 4E-4 (8.269215387990414e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.263231e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.291495e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.247254e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.311470e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896778056937195E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.6684s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3460s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3212s for 8192 events => throughput is 2.55E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + [XSECTION] Cross section = 7.668e-07 [7.6680052283166904E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.6451s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3196s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3242s for 8192 events => throughput is 2.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896778056937195E-007) differ by less than 4E-4 (1.0025677505964836e-06) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680052283166904E-007) differ by less than 4E-4 (9.9445726053915e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668139178203571E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 5.3279s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7717s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.5549s for 90112 events => throughput is 2.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + [XSECTION] Cross section = 7.616e-07 [7.6161383186590445E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 5.3588s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7925s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.5652s for 90112 events => throughput is 2.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668139178203571E-007) differ by less than 4E-4 (7.255530953820255e-07) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161383186590445E-007) differ by less than 4E-4 (1.0183578940115012e-06) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.589261e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.561524e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.602723e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.564740e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896802503195373E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8100s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7757s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s + [XSECTION] Cross section = 7.668e-07 [7.6680077090677233E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.7948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7580s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 8192 events => throughput is 4.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896802503195373E-007) differ by less than 4E-4 (1.3085410737190273e-06) +OK! xsec from fortran (7.6679976028208006E-007) and cuda (7.6680077090677233E-007) differ by less than 4E-4 (1.3179773190596933e-06) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668190930428073E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.3814s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1945s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1700s for 90112 events => throughput is 5.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0169s + [XSECTION] Cross section = 7.616e-07 [7.6161404612259676E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.4124s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2210s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1742s for 90112 events => throughput is 5.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0172s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668190930428073E-007) differ by less than 4E-4 (1.400569635601201e-06) +OK! xsec from fortran (7.6161305627123644E-007) and cuda (7.6161404612259676E-007) differ by less than 4E-4 (1.2996775096141278e-06) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.860775e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.251422e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.139558e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.470233e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.304686e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.297381e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.344126e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.335894e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.335964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.309724e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.345203e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.336754e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.314317e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.306739e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.679665e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.655313e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 7e343e91b1..ba367faa12 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda - make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -14,9 +14,9 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:56:30 +DATE: 2024-08-22_03:31:28 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 1 events (found 1041 events) - [COUNTERS] PROGRAM TOTAL : 2.5870s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3434s - [COUNTERS] Fortran MEs ( 1 ) : 2.2435s for 8192 events => throughput is 3.65E+03 events/s + [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 1 events (found 902 events) + [COUNTERS] PROGRAM TOTAL : 2.5441s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3094s + [COUNTERS] Fortran MEs ( 1 ) : 2.2348s for 8192 events => throughput is 3.67E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697955084444E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.5935s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3429s - [COUNTERS] Fortran MEs ( 1 ) : 2.2507s for 8192 events => throughput is 3.64E+03 events/s + [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 2.5333s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3136s + [COUNTERS] Fortran MEs ( 1 ) : 2.2197s for 8192 events => throughput is 3.69E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > / [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551438230E-007] fbridge_mode=0 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 26.4482s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7926s - [COUNTERS] Fortran MEs ( 1 ) : 24.6556s for 90112 events => throughput is 3.65E+03 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 26.3540s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7868s + [COUNTERS] Fortran MEs ( 1 ) : 24.5673s for 90112 events => throughput is 3.67E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696375074447E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 2.7899s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3466s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4385s for 8192 events => throughput is 3.36E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0049s + [XSECTION] Cross section = 7.668e-07 [7.6679974424193742E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 2.7700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3202s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4448s for 8192 events => throughput is 3.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696375074447E-007) differ by less than 2E-4 (1.9775660775600556e-08) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974424193742E-007) differ by less than 2E-4 (2.0918293763827478e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081976882373E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 28.6799s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7926s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.8820s for 90112 events => throughput is 3.35E+03 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161304067553537E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 28.7451s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8261s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.9138s for 90112 events => throughput is 3.35E+03 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0052s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081976882373E-007) differ by less than 2E-4 (2.0537305522871918e-08) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304067553537E-007) differ by less than 2E-4 (2.0477197604229502e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.507267e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.369003e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.511786e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.361318e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696285825688E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 1.5883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3421s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2436s for 8192 events => throughput is 6.59E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [XSECTION] Cross section = 7.668e-07 [7.6679974345453326E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 1.5711s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3281s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2400s for 8192 events => throughput is 6.61E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696285825688E-007) differ by less than 2E-4 (2.089271267102788e-08) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974345453326E-007) differ by less than 2E-4 (2.1945164352388247e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668081890954375E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 15.4498s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7701s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.6770s for 90112 events => throughput is 6.59E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [XSECTION] Cross section = 7.616e-07 [7.6161303969775166E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 15.3185s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7940s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.5219s for 90112 events => throughput is 6.66E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668081890954375E-007) differ by less than 2E-4 (2.1658084770059816e-08) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161303969775166E-007) differ by less than 2E-4 (2.1761030311040486e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.943689e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.037738e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.925887e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.020726e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9098s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3504s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5576s for 8192 events => throughput is 1.47E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8719s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5530s for 8192 events => throughput is 1.48E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974485677619E-007) differ by less than 2E-4 (2.0116469379161117e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.9207s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7702s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.1490s for 90112 events => throughput is 1.47E+04 events/s + [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 7.9219s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8203s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.1001s for 90112 events => throughput is 1.48E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304099640839E-007) differ by less than 2E-4 (2.0055890503911655e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.518105e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.526224e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.514088e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.532099e+04 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8334s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3445s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4873s for 8192 events => throughput is 1.68E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8069s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3190s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4864s for 8192 events => throughput is 1.68E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974485677619E-007) differ by less than 2E-4 (2.0116469379161117e-08) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 7.1725s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7642s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.4067s for 90112 events => throughput is 1.67E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 7.3372s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8007s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5349s for 90112 events => throughput is 1.63E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304099640839E-007) differ by less than 2E-4 (2.0055890503911655e-08) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.710218e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.753200e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.722202e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.745221e+04 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896696427369838E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.9928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3430s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6479s for 8192 events => throughput is 1.26E+04 events/s + [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.9868s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3258s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6591s for 8192 events => throughput is 1.24E+04 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0019s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cpp (7.9896696427369838E-007) differ by less than 2E-4 (1.9121123240317672e-08) +OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974485677619E-007) differ by less than 2E-4 (2.0116469379161117e-08) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668082030339872E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 9.0659s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7892s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.2749s for 90112 events => throughput is 1.24E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 9.0125s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8113s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.1991s for 90112 events => throughput is 1.25E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cpp (7.6668082030339872E-007) differ by less than 2E-4 (1.984004671662376e-08) +OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304099640839E-007) differ by less than 2E-4 (2.0055890503911655e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.210214e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.283850e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.254889e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.289918e+04 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.99e-07 [7.9896697918297644E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 215 events (found 963 events) - [COUNTERS] PROGRAM TOTAL : 0.8127s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7760s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0173s for 8192 events => throughput is 4.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0195s + [XSECTION] Cross section = 7.668e-07 [7.6679976038108255E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8017s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7621s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0199s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9896697955084444E-007) and cuda (7.9896697918297644E-007) differ by less than 2E-4 (4.6042958334879813e-10) +OK! xsec from fortran (7.6679976028208006E-007) and cuda (7.6679976038108255E-007) differ by less than 2E-4 (1.2911116620273333e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.667e-07 [7.6668083551547592E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1700 events (found 1705 events) - [COUNTERS] PROGRAM TOTAL : 2.4045s - [COUNTERS] Fortran Overhead ( 0 ) : 2.1952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1896s for 90112 events => throughput is 4.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s + [XSECTION] Cross section = 7.616e-07 [7.6161305624152697E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1833 events (found 1838 events) + [COUNTERS] PROGRAM TOTAL : 2.4327s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2190s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1938s for 90112 events => throughput is 4.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0199s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6668083551438230E-007) and cuda (7.6668083551547592E-007) differ by less than 2E-4 (1.4264145420384011e-12) +OK! xsec from fortran (7.6161305627123644E-007) and cuda (7.6161305624152697E-007) differ by less than 2E-4 (3.9008574148624575e-11) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.814747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.252966e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.187533e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.566943e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.164029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.826739e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.389995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.206857e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.128645e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.830268e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.372948e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.203053e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.119403e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.830312e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.750060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.670866e+05 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 0fe0851e40..70934f5a33 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -3,8 +3,8 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:50:54 +DATE: 2024-08-22_03:25:56 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 - [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6580s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6494s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.58E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 + [UNWEIGHT] Wrote 1732 events (found 4297 events) + [COUNTERS] PROGRAM TOTAL : 0.6295s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6211s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.67E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3938s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3851s - [COUNTERS] Fortran MEs ( 1 ) : 0.0086s for 8192 events => throughput is 9.50E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3742s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3662s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4272s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3345s - [COUNTERS] Fortran MEs ( 1 ) : 0.0927s for 90112 events => throughput is 9.72E+05 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3817s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2915s + [COUNTERS] Fortran MEs ( 1 ) : 0.0902s for 90112 events => throughput is 9.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.99E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3759s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.70E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4271s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3353s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0914s for 90112 events => throughput is 9.86E+05 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3675s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2784s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0887s for 90112 events => throughput is 1.02E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268059) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.006217e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.022296e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.022578e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.016823e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3903s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3856s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0043s for 8192 events => throughput is 1.89E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3773s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3721s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 8192 events => throughput is 1.70E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3937s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0489s for 90112 events => throughput is 1.84E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3157s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2675s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0479s for 90112 events => throughput is 1.88E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268059) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.897485e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952768e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.985824e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.962384e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3921s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3888s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3711s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3679s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3531s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3221s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0306s for 90112 events => throughput is 2.95E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3055s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2745s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0305s for 90112 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268065) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.126014e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.119431e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.364824e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.291456e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3854s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.20E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3759s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.87E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3635s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0295s for 90112 events => throughput is 3.05E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.2976s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2688s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 90112 events => throughput is 3.17E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268065) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.285096e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.232114e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.423598e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.515132e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869291] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3910s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3721s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3687s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.88E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156027201869291) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384418] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3563s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3235s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0323s for 90112 events => throughput is 2.79E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3825s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0373s for 90112 events => throughput is 2.42E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098556244384418) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268065) differ by less than 3E-14 (2.220446049250313e-16) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.866364e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.776132e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.134151e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.951337e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869280] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8152s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.37E+07 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620711] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.8051s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8015s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.72E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cuda (0.31156027201869280) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (0.30590697361620717) and cuda (0.30590697361620711) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384401] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7576s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7518s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0052s for 90112 events => throughput is 1.72E+07 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.7119s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7034s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 90112 events => throughput is 1.14E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cuda (0.31098556244384401) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30752644073268059) and cuda (0.30752644073268059) differ by less than 3E-14 (0.0) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.730366e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.431308e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.967481e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.846495e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.198830e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.587887e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.649618e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.941009e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.170218e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.564624e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.903772e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.932049e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.201664e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.607572e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.319844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251419e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 5c4b04cd13..b728cf01ba 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,11 +1,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x + make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone - - make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y @@ -13,10 +13,10 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:51:19 +DATE: 2024-08-22_03:26:20 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 - [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6497s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6414s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.86E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 + [UNWEIGHT] Wrote 1732 events (found 4297 events) + [COUNTERS] PROGRAM TOTAL : 0.6316s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6234s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 9.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.4039s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3951s - [COUNTERS] Fortran MEs ( 1 ) : 0.0089s for 8192 events => throughput is 9.25E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3771s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3687s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.68E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4878s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3911s - [COUNTERS] Fortran MEs ( 1 ) : 0.0967s for 90112 events => throughput is 9.32E+05 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3743s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2848s + [COUNTERS] Fortran MEs ( 1 ) : 0.0896s for 90112 events => throughput is 1.01E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156021439979276] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3975s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3887s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.70E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590691487682503] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3792s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3703s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.67E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021439979276) differ by less than 4E-4 (1.8493660913776466e-07) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691487682503) differ by less than 4E-4 (1.9201713985506075e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098550550786874] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s + [XSECTION] Cross section = 0.3075 [0.30752638362648882] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3687s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2786s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0898s for 90112 events => throughput is 1.00E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550550786874) differ by less than 4E-4 (1.8308237492714596e-07) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638362648882) differ by less than 4E-4 (1.8569522552969175e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.034265e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.015225e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.024334e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.014615e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156021343761686] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3905s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3875s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.09E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590691359923711] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3718s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3688s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.05E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021343761686) differ by less than 4E-4 (1.8802485879376718e-07) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691359923711) differ by less than 4E-4 (1.961935334193754e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098550488814170] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3711s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3420s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0289s for 90112 events => throughput is 3.12E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752638324844145] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.2961s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2660s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0298s for 90112 events => throughput is 3.03E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550488814170) differ by less than 4E-4 (1.8507515886501125e-07) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638324844145) differ by less than 4E-4 (1.8692454217816845e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.288372e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.167979e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.432097e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382172e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3889s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3868s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590691606590692] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3694s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.50E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021516056748) differ by less than 4E-4 (1.8249478717091705e-07) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691606590692) differ by less than 4E-4 (1.881300696338073e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3432s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3229s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0200s for 90112 events => throughput is 4.50E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752638419881734] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.2971s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 90112 events => throughput is 4.49E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550596898289) differ by less than 4E-4 (1.815996238940798e-07) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638419881734) differ by less than 4E-4 (1.8383415467670972e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.077269e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.033271e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.403997e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.297828e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156021516056748] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3869s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3848s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.55E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590691606590692] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3746s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0017s for 8192 events => throughput is 4.87E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021516056748) differ by less than 4E-4 (1.8249478717091705e-07) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691606590692) differ by less than 4E-4 (1.881300696338073e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098550596898289] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3387s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0187s for 90112 events => throughput is 4.81E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752638419881734] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.2872s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2681s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 90112 events => throughput is 4.79E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098550596898289) differ by less than 4E-4 (1.815996238940798e-07) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638419881734) differ by less than 4E-4 (1.8383415467670972e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.322495e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.272138e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.427973e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.513966e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156021917867366] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3878s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3853s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.78E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590692025204030] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3700s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3673s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.68E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156021917867366) differ by less than 4E-4 (1.695980652582918e-07) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590692025204030) differ by less than 4E-4 (1.744457350794093e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098551029624061] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3406s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3185s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 90112 events => throughput is 4.14E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [XSECTION] Cross section = 0.3075 [0.30752638860024578] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.2904s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2680s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0220s for 90112 events => throughput is 4.10E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098551029624061) differ by less than 4E-4 (1.6768496602370675e-07) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638860024578) differ by less than 4E-4 (1.6952179682228063e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.424607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.412227e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.888963e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.848494e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156022290359153] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8154s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.46E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [XSECTION] Cross section = 0.3059 [0.30590692347055715] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.8035s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cuda (0.31156022290359153) differ by less than 4E-4 (1.576423758198331e-07) +OK! xsec from fortran (0.30590697361620717) and cuda (0.30590692347055715) differ by less than 4E-4 (1.63924507634583e-07) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098551341908548] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7464s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7407s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0049s for 90112 events => throughput is 1.85E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [XSECTION] Cross section = 0.3075 [0.30752639119626163] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.7078s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6994s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 90112 events => throughput is 1.16E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cuda (0.31098551341908548) differ by less than 4E-4 (1.5764319793998283e-07) +OK! xsec from fortran (0.30752644073268059) and cuda (0.30752639119626163) differ by less than 4E-4 (1.6108019473826118e-07) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.032627e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.509110e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.278657e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.721348e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.543019e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.896109e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.578539e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.210427e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.555176e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.823420e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.658200e+09 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.187530e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.883073e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.474730e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.705532e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.680141e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 62624c2c92..c19c807968 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +make USEBUILDDIR=1 BACKEND=cuda - -make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone + make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:51:44 +DATE: 2024-08-22_03:26:43 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 - [UNWEIGHT] Wrote 1767 events (found 4306 events) - [COUNTERS] PROGRAM TOTAL : 0.6493s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6409s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.81E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 + [UNWEIGHT] Wrote 1732 events (found 4297 events) + [COUNTERS] PROGRAM TOTAL : 0.6345s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6262s + [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.92E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027201869302] fbridge_mode=0 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3992s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3909s - [COUNTERS] Fortran MEs ( 1 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3678s + [COUNTERS] Fortran MEs ( 1 ) : 0.0088s for 8192 events => throughput is 9.33E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556244384407] fbridge_mode=0 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4133s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3208s - [COUNTERS] Fortran MEs ( 1 ) : 0.0925s for 90112 events => throughput is 9.75E+05 events/s + [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3772s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2870s + [COUNTERS] Fortran MEs ( 1 ) : 0.0902s for 90112 events => throughput is 9.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3950s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3864s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 9.94E+05 events/s + [XSECTION] Cross section = 0.3059 [0.30590698182878584] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3757s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3669s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0085s for 8192 events => throughput is 9.66E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028014369008) differ by less than 2E-4 (2.6078411874408403e-08) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698182878584) differ by less than 2E-4 (2.6846654010981297e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.4087s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3177s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0906s for 90112 events => throughput is 9.95E+05 events/s + [XSECTION] Cross section = 0.3075 [0.30752644886297176] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3661s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2749s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0908s for 90112 events => throughput is 9.93E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557069460298) differ by less than 2E-4 (2.6531003172181045e-08) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644886297176) differ by less than 2E-4 (2.6437698030790102e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.803386e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.924593e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.910254e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.937019e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156028014369008] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3923s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3874s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.82E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590698182878584] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3724s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3675s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.85E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028014369008) differ by less than 2E-4 (2.6078411874408403e-08) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698182878584) differ by less than 2E-4 (2.6846654010981297e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098557069460298] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3653s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3175s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0474s for 90112 events => throughput is 1.90E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752644886297176] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3109s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2618s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0486s for 90112 events => throughput is 1.85E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557069460298) differ by less than 2E-4 (2.6531003172181045e-08) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644886297176) differ by less than 2E-4 (2.6437698030790102e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.964224e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.895255e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.028853e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.012680e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3954s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3923s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.03E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3741s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3710s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.09E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028097537258) differ by less than 2E-4 (2.8747823010988327e-08) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698277712874) differ by less than 2E-4 (2.994675618595011e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3415s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3131s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0280s for 90112 events => throughput is 3.22E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.2948s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2652s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0292s for 90112 events => throughput is 3.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557141632605) differ by less than 2E-4 (2.8851763866910574e-08) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644968184478) differ by less than 2E-4 (2.9100470699816583e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.237365e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.235450e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.416021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.403824e+06 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3940s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3909s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.04E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3706s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3676s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.13E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028097537258) differ by less than 2E-4 (2.8747823010988327e-08) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698277712874) differ by less than 2E-4 (2.994675618595011e-08) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3467s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3184s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0279s for 90112 events => throughput is 3.23E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3026s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2739s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0283s for 90112 events => throughput is 3.19E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557141632605) differ by less than 2E-4 (2.8851763866910574e-08) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644968184478) differ by less than 2E-4 (2.9100470699816583e-08) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.347126e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.352796e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.589308e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.508682e+06 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156028097537258] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3942s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.66E+06 events/s + [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.3718s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3683s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.77E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cpp (0.31156028097537258) differ by less than 2E-4 (2.8747823010988327e-08) +OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698277712874) differ by less than 2E-4 (2.994675618595011e-08) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098557141632605] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.3501s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3186s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0311s for 90112 events => throughput is 2.90E+06 events/s + [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.3059s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2738s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 90112 events => throughput is 2.85E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cpp (0.31098557141632605) differ by less than 2E-4 (2.8851763866910574e-08) +OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644968184478) differ by less than 2E-4 (2.9100470699816583e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.904623e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.998636e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.114835e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.181761e+06 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3116 [0.31156027194560187] fbridge_mode=1 - [UNWEIGHT] Wrote 1636 events (found 1641 events) - [COUNTERS] PROGRAM TOTAL : 0.8152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8140s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.39E+07 events/s + [XSECTION] Cross section = 0.3059 [0.30590697378458576] fbridge_mode=1 + [UNWEIGHT] Wrote 1609 events (found 1614 events) + [COUNTERS] PROGRAM TOTAL : 0.8091s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8058s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31156027201869302) and cuda (0.31156027194560187) differ by less than 2E-4 (2.345971195083507e-10) +OK! xsec from fortran (0.30590697361620717) and cuda (0.30590697378458576) differ by less than 2E-4 (5.504241507026109e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.311 [0.31098556243340819] fbridge_mode=1 - [UNWEIGHT] Wrote 1828 events (found 1833 events) - [COUNTERS] PROGRAM TOTAL : 1.7501s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7444s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 90112 events => throughput is 1.75E+07 events/s + [XSECTION] Cross section = 0.3075 [0.30752644069868873] fbridge_mode=1 + [UNWEIGHT] Wrote 1788 events (found 1793 events) + [COUNTERS] PROGRAM TOTAL : 1.7170s + [COUNTERS] Fortran Overhead ( 0 ) : 1.7082s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.31098556244384407) and cuda (0.31098556243340819) differ by less than 2E-4 (3.3557379097715057e-11) +OK! xsec from fortran (0.30752644073268059) and cuda (0.30752644069868873) differ by less than 2E-4 (1.1053313819786581e-10) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.842332e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.284948e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.019027e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.816586e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.214756e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.563395e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.517612e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.965519e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.171297e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.627148e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.740991e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966795e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.214875e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604319e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.310258e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.244921e+08 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 6131633fdd..223cb89e40 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,22 +1,22 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:49:37 +DATE: 2024-08-22_03:24:39 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 - [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8016s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7599s - [COUNTERS] Fortran MEs ( 1 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 + [UNWEIGHT] Wrote 2625 events (found 5368 events) + [COUNTERS] PROGRAM TOTAL : 0.7783s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7375s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4173s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s - [COUNTERS] Fortran MEs ( 1 ) : 0.0415s for 8192 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4046s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3636s + [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6984s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2478s - [COUNTERS] Fortran MEs ( 1 ) : 0.4506s for 90112 events => throughput is 2.00E+05 events/s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.7079s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2586s + [COUNTERS] Fortran MEs ( 1 ) : 0.4493s for 90112 events => throughput is 2.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419863] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0438s for 8192 events => throughput is 1.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 44.69 [44.690956764848757] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4106s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0433s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419863) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848757) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7366s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2536s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4825s for 90112 events => throughput is 1.87E+05 events/s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.7289s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4734s for 90112 events => throughput is 1.90E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.880754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.902843e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.882930e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.876183e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3713s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0243s for 8192 events => throughput is 3.37E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3943s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3702s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.45E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848742) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256471] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5199s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2483s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2711s for 90112 events => throughput is 3.32E+05 events/s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.5124s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2465s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2655s for 90112 events => throughput is 3.39E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256471) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.302363e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.349577e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.365112e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.361639e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3924s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0155s for 8192 events => throughput is 5.28E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3846s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3693s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0149s for 8192 events => throughput is 5.51E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848735) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4183s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2503s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1675s for 90112 events => throughput is 5.38E+05 events/s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.4238s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2562s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1672s for 90112 events => throughput is 5.39E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.278183e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.326838e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.374748e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.346593e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3894s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0136s for 8192 events => throughput is 6.02E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3794s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3653s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0137s for 8192 events => throughput is 5.96E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848735) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2454s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1520s for 90112 events => throughput is 5.93E+05 events/s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.4133s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2611s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1518s for 90112 events => throughput is 5.94E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.775498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.665657e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.841522e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.840518e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4047s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3821s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0222s for 8192 events => throughput is 3.70E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3670s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0218s for 8192 events => throughput is 3.76E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598860065419856) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848735) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4927s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2545s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2377s for 90112 events => throughput is 3.79E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.4886s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2539s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2342s for 90112 events => throughput is 3.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.798876e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.589281e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.612840e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.561336e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419849] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.24E+07 events/s + [XSECTION] Cross section = 44.69 [44.690956764848728] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.7974s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7937s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.82E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cuda (44.598860065419849) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.690956764848742) and cuda (44.690956764848728) differ by less than 3E-14 (3.3306690738754696e-16) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256485] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6862s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.37E+07 events/s + [XSECTION] Cross section = 44.41 [44.411959203413069] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.6943s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6845s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 90112 events => throughput is 9.96E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cuda (44.577523870256485) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (44.411959203413062) and cuda (44.411959203413069) differ by less than 3E-14 (2.220446049250313e-16) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.869432e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.299319e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.714086e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.562666e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.311155e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.907907e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.083882e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.696731e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.322734e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.935865e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.159310e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.015733e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.296675e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.923062e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.098537e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.715601e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 58b86df658..b2e100edc8 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -2,11 +2,11 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/s make USEBUILDDIR=1 BACKEND=cuda + make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -14,13 +14,13 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cpp512z make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:50:03 +DATE: 2024-08-22_03:25:04 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 - [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8051s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7635s - [COUNTERS] Fortran MEs ( 1 ) : 0.0416s for 8192 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 + [UNWEIGHT] Wrote 2625 events (found 5368 events) + [COUNTERS] PROGRAM TOTAL : 0.7803s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7389s + [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4148s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3740s - [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3616s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7188s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2615s - [COUNTERS] Fortran MEs ( 1 ) : 0.4573s for 90112 events => throughput is 1.97E+05 events/s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.7586s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2978s + [COUNTERS] Fortran MEs ( 1 ) : 0.4608s for 90112 events => throughput is 1.96E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598853620719339] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3751s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [XSECTION] Cross section = 44.69 [44.690951135742296] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4293s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3867s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0423s for 8192 events => throughput is 1.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598853620719339) differ by less than 4E-4 (1.4450370500185272e-07) +OK! xsec from fortran (44.690956764848742) and cpp (44.690951135742296) differ by less than 4E-4 (1.2595627518763308e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577522280119403] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7041s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4538s for 90112 events => throughput is 1.99E+05 events/s + [XSECTION] Cross section = 44.41 [44.411953404075810] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.7195s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4515s for 90112 events => throughput is 2.00E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577522280119403) differ by less than 4E-4 (3.567127371262302e-08) +OK! xsec from fortran (44.411959203413062) and cpp (44.411953404075810) differ by less than 4E-4 (1.305805318319031e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.004528e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.018305e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.989674e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.967614e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598849697851406] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3933s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3758s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0172s for 8192 events => throughput is 4.76E+05 events/s + [XSECTION] Cross section = 44.69 [44.690947248027847] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3829s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3658s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.86E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598849697851406) differ by less than 4E-4 (2.3246263325393812e-07) +OK! xsec from fortran (44.690956764848742) and cpp (44.690947248027847) differ by less than 4E-4 (2.1294735186305758e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577518590213366] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4571s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2702s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1866s for 90112 events => throughput is 4.83E+05 events/s + [XSECTION] Cross section = 44.41 [44.411949727730686] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.4415s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1830s for 90112 events => throughput is 4.92E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577518590213366) differ by less than 4E-4 (1.1844630731783212e-07) +OK! xsec from fortran (44.411959203413062) and cpp (44.411949727730686) differ by less than 4E-4 (2.1335880118211747e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.766493e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.788292e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.711541e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.716767e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3932s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3838s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 8192 events => throughput is 8.99E+05 events/s + [XSECTION] Cross section = 44.69 [44.690947419182343] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3758s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3668s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.41E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598850036412124) differ by less than 4E-4 (2.2487139172966408e-07) +OK! xsec from fortran (44.690956764848742) and cpp (44.690947419182343) differ by less than 4E-4 (2.0911761744457635e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3456s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2495s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0958s for 90112 events => throughput is 9.40E+05 events/s + [XSECTION] Cross section = 44.41 [44.411949260921247] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.3601s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2622s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0976s for 90112 events => throughput is 9.23E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577518612400254) differ by less than 4E-4 (1.1794859255953583e-07) +OK! xsec from fortran (44.411959203413062) and cpp (44.411949260921247) differ by less than 4E-4 (2.238696962253428e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.204759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.159373e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.210555e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.212730e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598850036412124] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3855s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.85E+05 events/s + [XSECTION] Cross section = 44.69 [44.690947419182343] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3724s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3634s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 8192 events => throughput is 9.44E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598850036412124) differ by less than 4E-4 (2.2487139172966408e-07) +OK! xsec from fortran (44.690956764848742) and cpp (44.690947419182343) differ by less than 4E-4 (2.0911761744457635e-07) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577518612400254] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3394s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2483s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0908s for 90112 events => throughput is 9.92E+05 events/s + [XSECTION] Cross section = 44.41 [44.411949260921247] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.3449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2530s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0916s for 90112 events => throughput is 9.84E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577518612400254) differ by less than 4E-4 (1.1794859255953583e-07) +OK! xsec from fortran (44.411959203413062) and cpp (44.411949260921247) differ by less than 4E-4 (2.238696962253428e-07) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.706656e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.712881e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.233766e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.945039e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598854350242270] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3868s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3748s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0116s for 8192 events => throughput is 7.03E+05 events/s + [XSECTION] Cross section = 44.69 [44.690951463003671] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3793s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0109s for 8192 events => throughput is 7.53E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598854350242270) differ by less than 4E-4 (1.2814627048385319e-07) +OK! xsec from fortran (44.690956764848742) and cpp (44.690951463003671) differ by less than 4E-4 (1.1863351012664225e-07) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577522751628507] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.3825s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2565s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1256s for 90112 events => throughput is 7.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [XSECTION] Cross section = 44.41 [44.411953494761157] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.3787s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2531s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1252s for 90112 events => throughput is 7.20E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577522751628507) differ by less than 4E-4 (2.5093990219104967e-08) +OK! xsec from fortran (44.411959203413062) and cpp (44.411953494761157) differ by less than 4E-4 (1.2853861908190822e-07) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.942843e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.999423e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.910825e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.895597e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598870301426373] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8091s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8078s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0006s for 8192 events => throughput is 1.43E+07 events/s + [XSECTION] Cross section = 44.69 [44.690956060520207] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.7963s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7928s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.90E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cuda (44.598870301426373) differ by less than 4E-4 (2.2951273881410827e-07) +OK! xsec from fortran (44.690956764848742) and cuda (44.690956060520207) differ by less than 4E-4 (1.575997887748315e-08) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577527268256027] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7098s - [COUNTERS] Fortran Overhead ( 0 ) : 1.7033s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 90112 events => throughput is 1.56E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [XSECTION] Cross section = 44.41 [44.411957327369002] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.6921s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6834s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 90112 events => throughput is 1.11E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cuda (44.577527268256027) differ by less than 4E-4 (7.622674558227516e-08) +OK! xsec from fortran (44.411959203413062) and cuda (44.411957327369002) differ by less than 4E-4 (4.22418666712332e-08) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.705094e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.399026e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.269887e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.601106e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.888199e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.120477e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.391800e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.431044e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.898622e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.154807e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.539526e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.369838e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.473018e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.738148e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.495430e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.056397e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 75d0c77429..f71519e4b2 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 + make USEBUILDDIR=1 BACKEND=cpp512y make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -20,8 +20,8 @@ make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' @@ -32,7 +32,7 @@ make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ OMP_NUM_THREADS= -DATE: 2024-08-09_00:50:28 +DATE: 2024-08-22_03:25:30 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx @@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 - [UNWEIGHT] Wrote 2620 events (found 5403 events) - [COUNTERS] PROGRAM TOTAL : 0.8208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7796s - [COUNTERS] Fortran MEs ( 1 ) : 0.0412s for 8192 events => throughput is 1.99E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 + [UNWEIGHT] Wrote 2625 events (found 5368 events) + [COUNTERS] PROGRAM TOTAL : 0.7988s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7579s + [COUNTERS] Fortran MEs ( 1 ) : 0.0409s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/ [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860065419856] fbridge_mode=0 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4160s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 1.99E+05 events/s + [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4045s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3630s + [COUNTERS] Fortran MEs ( 1 ) : 0.0414s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523870256456] fbridge_mode=0 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7104s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2559s - [COUNTERS] Fortran MEs ( 1 ) : 0.4544s for 90112 events => throughput is 1.98E+05 events/s + [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.7188s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2686s + [COUNTERS] Fortran MEs ( 1 ) : 0.4502s for 90112 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -125,22 +125,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.4204s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3749s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0450s for 8192 events => throughput is 1.82E+05 events/s + [XSECTION] Cross section = 44.69 [44.690958008771119] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.4129s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3682s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598861353577519) differ by less than 2E-4 (2.888319694527297e-08) +OK! xsec from fortran (44.690956764848742) and cpp (44.690958008771119) differ by less than 2E-4 (2.783387209603916e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,22 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577525144126803] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.7448s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4867s for 90112 events => throughput is 1.85E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 44.41 [44.411960462697984] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.7399s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2583s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4812s for 90112 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577525144126803) differ by less than 2E-4 (2.8576516486467085e-08) +OK! xsec from fortran (44.411959203413062) and cpp (44.411960462697984) differ by less than 2E-4 (2.835463575046049e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -183,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.873127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.903043e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907422e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.868412e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -203,22 +205,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598861353577519] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3712s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0244s for 8192 events => throughput is 3.36E+05 events/s + [XSECTION] Cross section = 44.69 [44.690958008771119] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3649s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0238s for 8192 events => throughput is 3.44E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598861353577519) differ by less than 2E-4 (2.888319694527297e-08) +OK! xsec from fortran (44.690956764848742) and cpp (44.690958008771119) differ by less than 2E-4 (2.783387209603916e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -237,22 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577525144126810] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.5269s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2579s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2686s for 90112 events => throughput is 3.35E+05 events/s + [XSECTION] Cross section = 44.41 [44.411960462697984] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.5275s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2648s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2623s for 90112 events => throughput is 3.44E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577525144126810) differ by less than 2E-4 (2.857651670851169e-08) +OK! xsec from fortran (44.411959203413062) and cpp (44.411960462697984) differ by less than 2E-4 (2.835463575046049e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -261,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.333942e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.387840e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.376975e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.398750e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -281,22 +285,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3926s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0153s for 8192 events => throughput is 5.37E+05 events/s + [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3806s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3654s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0148s for 8192 events => throughput is 5.54E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598861344883289) differ by less than 2E-4 (2.868825421664667e-08) +OK! xsec from fortran (44.690956764848742) and cpp (44.690958040780203) differ by less than 2E-4 (2.8550103836622043e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -315,22 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4173s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2508s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1662s for 90112 events => throughput is 5.42E+05 events/s + [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.4192s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2545s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1642s for 90112 events => throughput is 5.49E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577525178109212) differ by less than 2E-4 (2.9338838025694258e-08) +OK! xsec from fortran (44.411959203413062) and cpp (44.411960501666542) differ by less than 2E-4 (2.923206943172829e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -339,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.335642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.285681e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.330908e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.379658e+05 ) sec^-1 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -359,22 +365,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3897s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3750s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0143s for 8192 events => throughput is 5.74E+05 events/s + [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3823s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0141s for 8192 events => throughput is 5.80E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598861344883289) differ by less than 2E-4 (2.868825421664667e-08) +OK! xsec from fortran (44.690956764848742) and cpp (44.690958040780203) differ by less than 2E-4 (2.8550103836622043e-08) *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -393,22 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4068s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2528s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1536s for 90112 events => throughput is 5.87E+05 events/s + [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.4079s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1539s for 90112 events => throughput is 5.85E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577525178109212) differ by less than 2E-4 (2.9338838025694258e-08) +OK! xsec from fortran (44.411959203413062) and cpp (44.411960501666542) differ by less than 2E-4 (2.923206943172829e-08) *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -417,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.855366e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.964392e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.947430e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.009824e+05 ) sec^-1 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -437,22 +445,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598861344883289] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.3995s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3772s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.75E+05 events/s + [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.3899s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3681s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0213s for 8192 events => throughput is 3.84E+05 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cpp (44.598861344883289) differ by less than 2E-4 (2.868825421664667e-08) +OK! xsec from fortran (44.690956764848742) and cpp (44.690958040780203) differ by less than 2E-4 (2.8550103836622043e-08) *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -471,22 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577525178109212] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.4943s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2580s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2358s for 90112 events => throughput is 3.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.4849s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2550s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2294s for 90112 events => throughput is 3.93E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cpp (44.577525178109212) differ by less than 2E-4 (2.9338838025694258e-08) +OK! xsec from fortran (44.411959203413062) and cpp (44.411960501666542) differ by less than 2E-4 (2.923206943172829e-08) *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -495,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.733262e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.850222e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.702855e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.898534e+05 ) sec^-1 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- @@ -515,22 +525,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.6 [44.598860056955807] fbridge_mode=1 - [UNWEIGHT] Wrote 1603 events (found 1608 events) - [COUNTERS] PROGRAM TOTAL : 0.8053s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8039s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0007s for 8192 events => throughput is 1.21E+07 events/s + [XSECTION] Cross section = 44.69 [44.690956743203600] fbridge_mode=1 + [UNWEIGHT] Wrote 1616 events (found 1621 events) + [COUNTERS] PROGRAM TOTAL : 0.7956s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7919s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.79E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.598860065419856) and cuda (44.598860056955807) differ by less than 2E-4 (1.8978174587402918e-10) +OK! xsec from fortran (44.690956764848742) and cuda (44.690956743203600) differ by less than 2E-4 (4.843293543999039e-10) *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -549,22 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 } [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.58 [44.577523872560512] fbridge_mode=1 - [UNWEIGHT] Wrote 1743 events (found 1748 events) - [COUNTERS] PROGRAM TOTAL : 1.6927s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6853s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 90112 events => throughput is 1.36E+07 events/s + [XSECTION] Cross section = 44.41 [44.411959201897950] fbridge_mode=1 + [UNWEIGHT] Wrote 1775 events (found 1780 events) + [COUNTERS] PROGRAM TOTAL : 1.6977s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6877s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0092s for 90112 events => throughput is 9.74E+06 events/s [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.577523870256456) and cuda (44.577523872560512) differ by less than 2E-4 (5.168643291142416e-11) +OK! xsec from fortran (44.411959203413062) and cuda (44.411959201897950) differ by less than 2E-4 (3.411493310068181e-11) *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -573,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.871837e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.155579e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.622666e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.432618e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.299743e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.917316e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.055606e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.497523e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.302003e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.925954e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.140289e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.067763e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.319830e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.932333e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.983678e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.742049e+07 ) sec^-1 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***